From 8952059b22c286ecb03dfe97cfc2342243bd49c0 Mon Sep 17 00:00:00 2001
From: Soby Chacko <soby.chacko@broadcom.com>
Date: Fri, 13 Mar 2026 11:27:50 -0400
Subject: [PATCH] Replace RestClient-based Anthropic module with official Java
 SDK

Rewrite the Anthropic integration from the ground up on the official
Anthropic Java SDK (v2.16.1), replacing the hand-rolled RestClient
implementation.

Core capabilities:

  - Sync and streaming chat with typed SDK response objects
  - Tool calling with JsonValue visitor pattern for schema conversion
  - Streaming tool calling with partial JSON accumulation and recursive
    multi-turn execution
  - Multi-modal inputs (base64 images, HTTPS URLs, PDF documents)
  - Extended thinking (ThinkingBlock, RedactedThinkingBlock) with
    budget control and adaptive mode
  - Citations across plain text, PDF, and custom content documents
    with three location types (char, page, content block)
  - Citation consistency validation on build to reject mixed settings
  - Prompt caching with 5 strategies, configurable TTL, content length
    thresholds, and multi-block system message support
  - Structured output via OutputConfig with JSON schema enforcement
    and effort control (LOW/MEDIUM/HIGH/MAX)
  - Per-request HTTP headers for beta features and request tracking
  - Claude Skills (XLSX, PPTX, DOCX, PDF) with Files API for
    downloading generated documents
  - disableParallelToolUse wiring into ToolChoice subtypes
  - Expose underlying SDK clients via getAnthropicClient() and
    getAnthropicClientAsync() for direct access to SDK features

Infrastructure:

  - Builder aligned with DefaultToolCallingChatOptions.Builder using
    self-type generics, mutate()/combineWith() pattern
  - SDK-native retry (no Spring RetryTemplate), matching spring-ai-openai-sdk
  - Raw SDK Message stored in metadata under "anthropic-response" key
  - Observation/tracing support with Micrometer for both sync and
    streaming paths
  - Default model: claude-haiku-4-5, default max tokens: 4096
  - Auto-configuration and starter modules
  - Comprehensive unit tests (55+) and integration tests (35+)
  - Reference documentation with configuration properties

Signed-off-by: Soby Chacko <soby.chacko@broadcom.com>
---
 .../pom.xml                                   |   18 -
 .../AnthropicChatAutoConfiguration.java       |   77 +-
 .../AnthropicChatProperties.java              |   20 +-
 .../AnthropicConnectionProperties.java        |   76 +-
 .../StringToToolChoiceConverter.java          |   42 -
 .../autoconfigure}/package-info.java          |    2 +-
 .../AnthropicChatAutoConfigurationIT.java     |   39 +-
 .../AnthropicModelConfigurationTests.java     |    6 +-
 .../AnthropicPropertiesTests.java             |   86 +-
 .../autoconfigure/BaseAnthropicIT.java        |   36 -
 .../tool/FunctionCallWithFunctionBeanIT.java  |   26 +-
 .../FunctionCallWithPromptFunctionIT.java     |   41 +-
 models/spring-ai-anthropic/README.md          |  282 +-
 models/spring-ai-anthropic/pom.xml            |   57 +-
 .../anthropic/AbstractAnthropicOptions.java   |  133 +
 .../{api => }/AnthropicCacheOptions.java      |   77 +-
 .../ai/anthropic/AnthropicCacheStrategy.java  |   60 +
 .../{api => }/AnthropicCacheTtl.java          |   30 +-
 .../ai/anthropic/AnthropicChatModel.java      | 2040 ++++++++------
 .../ai/anthropic/AnthropicChatOptions.java    |  807 +++---
 ...nt.java => AnthropicCitationDocument.java} |  167 +-
 .../ai/anthropic/AnthropicSetup.java          |  237 ++
 .../ai/anthropic/AnthropicSkill.java          |  108 +
 .../ai/anthropic/AnthropicSkillContainer.java |  160 ++
 .../ai/anthropic/AnthropicSkillRecord.java    |  119 +
 ...kage-info.java => AnthropicSkillType.java} |   32 +-
 .../AnthropicSkillsResponseHelper.java        |  169 +-
 .../utils => }/CacheBreakpointTracker.java    |    4 +-
 .../utils => }/CacheEligibilityResolver.java  |   61 +-
 .../ai/anthropic/Citation.java                |   17 +-
 .../anthropic/aot/AnthropicRuntimeHints.java  |   45 -
 .../ai/anthropic/aot/package-info.java        |   20 -
 .../ai/anthropic/api/AnthropicApi.java        | 2433 -----------------
 .../anthropic/api/AnthropicCacheStrategy.java |  126 -
 .../ai/anthropic/api/AnthropicCacheType.java  |   70 -
 .../ai/anthropic/api/StreamHelper.java        |  323 ---
 .../ai/anthropic/api/utils/package-info.java  |   20 -
 .../metadata/AnthropicRateLimit.java          |   92 -
 .../ai/anthropic/package-info.java            |   34 +
 .../resources/META-INF/spring/aot.factories   |    2 -
 .../{api => }/AnthropicCacheOptionsTests.java |   41 +-
 ...ropicChatModelAdditionalHttpHeadersIT.java |   77 -
 .../AnthropicChatModelSkillsTests.java        |  295 --
 .../ai/anthropic/AnthropicChatModelTests.java |  411 +++
 .../AnthropicChatOptionsSkillsTests.java      |  206 --
 .../anthropic/AnthropicChatOptionsTests.java  |  766 +++---
 .../ai/anthropic/AnthropicCitationIT.java     |  301 --
 .../anthropic/AnthropicPromptCachingIT.java   |  615 -----
 .../AnthropicPromptCachingMockTest.java       |  804 ------
 .../ai/anthropic/AnthropicSkillsIT.java       |  173 +-
 .../AnthropicSkillsResponseHelperTests.java   |  231 +-
 .../anthropic/AnthropicTestConfiguration.java |   25 +-
 .../CacheEligibilityResolverTests.java        |  125 +-
 .../anthropic/ChatCompletionRequestTests.java |  129 -
 .../ai/anthropic/EventParsingTests.java       |   56 -
 .../aot/AnthropicRuntimeHintsTests.java       |  230 --
 .../api/AnthropicApiBuilderTests.java         |  423 ---
 .../anthropic/api/AnthropicApiFilesTests.java |  130 -
 .../ai/anthropic/api/AnthropicApiIT.java      |  366 ---
 .../anthropic/api/AnthropicApiSkillTests.java |  173 --
 ...letionRequestSkillsSerializationTests.java |  161 --
 .../ai/anthropic/api/StreamHelperTests.java   |  522 ----
 .../api/tool/AnthropicApiToolIT.java          |  159 --
 .../{ => chat}/AnthropicChatModelIT.java      |  670 +++--
 .../AnthropicChatModelObservationIT.java      |   46 +-
 .../chat/AnthropicPromptCachingIT.java        |  403 +++
 .../tool => chat}/MockWeatherService.java     |    6 +-
 .../client/AnthropicChatClientIT.java         |  403 ---
 ...lientMethodInvokingFunctionCallbackIT.java |  379 ---
 .../client/AnthropicToolCallAdvisorIT.java    |   59 -
 ...ClientToolsWithGenericArgumentTypesIT.java |  103 -
 .../application-logging-test.properties       |   19 -
 .../test/resources/prompts/system-message.st  |    2 +-
 .../src/test/resources/sample_events.json     |  243 --
 pom.xml                                       |    5 +-
 .../ROOT/pages/api/chat/anthropic-chat.adoc   | 2353 +++++-----------
 .../spring-ai-starter-model-anthropic/pom.xml |   12 +-
 77 files changed, 5799 insertions(+), 13517 deletions(-)
 delete mode 100644 auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/StringToToolChoiceConverter.java
 rename {models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata => auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure}/package-info.java (91%)
 delete mode 100644 auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/BaseAnthropicIT.java
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AbstractAnthropicOptions.java
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api => }/AnthropicCacheOptions.java (73%)
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheStrategy.java
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api => }/AnthropicCacheTtl.java (62%)
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api/CitationDocument.java => AnthropicCitationDocument.java} (50%)
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSetup.java
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkill.java
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillContainer.java
 create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillRecord.java
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api/package-info.java => AnthropicSkillType.java} (56%)
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api/utils => }/CacheBreakpointTracker.java (95%)
 rename models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/{api/utils => }/CacheEligibilityResolver.java (62%)
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHints.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/package-info.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/package-info.java
 delete mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/AnthropicRateLimit.java
 delete mode 100644 models/spring-ai-anthropic/src/main/resources/META-INF/spring/aot.factories
 rename models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/{api => }/AnthropicCacheOptionsTests.java (59%)
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelAdditionalHttpHeadersIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelSkillsTests.java
 create mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsSkillsTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCitationIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
 rename models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/{api/utils => }/CacheEligibilityResolverTests.java (68%)
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/ChatCompletionRequestTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/EventParsingTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHintsTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiBuilderTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiFilesTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiSkillTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/ChatCompletionRequestSkillsSerializationTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/StreamHelperTests.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/AnthropicApiToolIT.java
 rename models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/{ => chat}/AnthropicChatModelIT.java (54%)
 rename models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/{ => chat}/AnthropicChatModelObservationIT.java (81%)
 create mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicPromptCachingIT.java
 rename models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/{api/tool => chat}/MockWeatherService.java (94%)
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientMethodInvokingFunctionCallbackIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicToolCallAdvisorIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/ChatClientToolsWithGenericArgumentTypesIT.java
 delete mode 100644 models/spring-ai-anthropic/src/test/resources/application-logging-test.properties
 delete mode 100644 models/spring-ai-anthropic/src/test/resources/sample_events.json

diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/pom.xml b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/pom.xml
index 41ab7c58975..62326557ed0 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/pom.xml
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/pom.xml
@@ -30,7 +30,6 @@
 			<groupId>org.springframework.ai</groupId>
 			<artifactId>spring-ai-anthropic</artifactId>
 			<version>${project.parent.version}</version>
-			<optional>true</optional>
 		</dependency>
 
 		<!-- Spring AI auto configurations -->
@@ -41,12 +40,6 @@
 			<version>${project.parent.version}</version>
 		</dependency>
 
-		<dependency>
-			<groupId>org.springframework.ai</groupId>
-			<artifactId>spring-ai-autoconfigure-retry</artifactId>
-			<version>${project.parent.version}</version>
-		</dependency>
-
 		<dependency>
 			<groupId>org.springframework.ai</groupId>
 			<artifactId>spring-ai-autoconfigure-model-chat-observation</artifactId>
@@ -60,17 +53,6 @@
 			<optional>true</optional>
 		</dependency>
 
-		<dependency>
-			<groupId>org.springframework.boot</groupId>
-			<artifactId>spring-boot-starter-webclient</artifactId>
-			<optional>true</optional>
-		</dependency>
-		<dependency>
-			<groupId>org.springframework.boot</groupId>
-			<artifactId>spring-boot-starter-restclient</artifactId>
-			<optional>true</optional>
-		</dependency>
-
 		<dependency>
 			<groupId>org.springframework.boot</groupId>
 			<artifactId>spring-boot-configuration-processor</artifactId>
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfiguration.java
index 21e8be122fb..a14d5c41b44 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfiguration.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfiguration.java
@@ -16,10 +16,11 @@
 
 package org.springframework.ai.model.anthropic.autoconfigure;
 
+import com.anthropic.client.AnthropicClient;
 import io.micrometer.observation.ObservationRegistry;
 
 import org.springframework.ai.anthropic.AnthropicChatModel;
-import org.springframework.ai.anthropic.api.AnthropicApi;
+import org.springframework.ai.anthropic.AnthropicChatOptions;
 import org.springframework.ai.chat.observation.ChatModelObservationConvention;
 import org.springframework.ai.model.SpringAIModelProperties;
 import org.springframework.ai.model.SpringAIModels;
@@ -27,77 +28,61 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.autoconfigure.ToolCallingAutoConfiguration;
-import org.springframework.ai.retry.RetryUtils;
-import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
 import org.springframework.beans.factory.ObjectProvider;
 import org.springframework.boot.autoconfigure.AutoConfiguration;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
-import org.springframework.boot.restclient.autoconfigure.RestClientAutoConfiguration;
-import org.springframework.boot.webclient.autoconfigure.WebClientAutoConfiguration;
 import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Import;
-import org.springframework.core.retry.RetryTemplate;
-import org.springframework.web.client.ResponseErrorHandler;
-import org.springframework.web.client.RestClient;
-import org.springframework.web.reactive.function.client.WebClient;
 
 /**
  * {@link AutoConfiguration Auto-configuration} for Anthropic Chat Model.
  *
- * @author Christian Tzolov
- * @author Thomas Vitale
- * @author Ilayaperumal Gopinathan
- * @author Hyoseop Song
- * @author Yanming Zhou
- * @since 1.0.0
+ * @author Soby Chacko
+ * @since 2.0.0
  */
-@AutoConfiguration(after = { RestClientAutoConfiguration.class, WebClientAutoConfiguration.class,
-		ToolCallingAutoConfiguration.class, SpringAiRetryAutoConfiguration.class })
-@EnableConfigurationProperties({ AnthropicChatProperties.class, AnthropicConnectionProperties.class })
-@ConditionalOnClass(AnthropicApi.class)
+@AutoConfiguration(after = { ToolCallingAutoConfiguration.class })
+@EnableConfigurationProperties({ AnthropicConnectionProperties.class, AnthropicChatProperties.class })
+@ConditionalOnClass(AnthropicClient.class)
 @ConditionalOnProperty(name = SpringAIModelProperties.CHAT_MODEL, havingValue = SpringAIModels.ANTHROPIC,
 		matchIfMissing = true)
-@Import(StringToToolChoiceConverter.class)
 public class AnthropicChatAutoConfiguration {
 
 	@Bean
 	@ConditionalOnMissingBean
-	public AnthropicApi anthropicApi(AnthropicConnectionProperties connectionProperties,
-			ObjectProvider<RestClient.Builder> restClientBuilderProvider,
-			ObjectProvider<WebClient.Builder> webClientBuilderProvider,
-			ObjectProvider<ResponseErrorHandler> responseErrorHandler) {
-
-		return AnthropicApi.builder()
-			.baseUrl(connectionProperties.getBaseUrl())
-			.completionsPath(connectionProperties.getCompletionsPath())
-			.apiKey(connectionProperties.getApiKey())
-			.anthropicVersion(connectionProperties.getVersion())
-			.restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder))
-			.webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder))
-			.responseErrorHandler(responseErrorHandler.getIfAvailable(() -> RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER))
-			.anthropicBetaFeatures(connectionProperties.getBetaVersion())
-			.build();
-	}
-
-	@Bean
-	@ConditionalOnMissingBean
-	public AnthropicChatModel anthropicChatModel(AnthropicApi anthropicApi, AnthropicChatProperties chatProperties,
-			ObjectProvider<RetryTemplate> retryTemplate, ToolCallingManager toolCallingManager,
+	public AnthropicChatModel anthropicChatModel(AnthropicConnectionProperties connectionProperties,
+			AnthropicChatProperties chatProperties, ToolCallingManager toolCallingManager,
 			ObjectProvider<ObservationRegistry> observationRegistry,
 			ObjectProvider<ChatModelObservationConvention> observationConvention,
 			ObjectProvider<ToolExecutionEligibilityPredicate> anthropicToolExecutionEligibilityPredicate) {
 
+		AnthropicChatOptions options = chatProperties.getOptions();
+		if (connectionProperties.getApiKey() != null) {
+			options.setApiKey(connectionProperties.getApiKey());
+		}
+		if (connectionProperties.getBaseUrl() != null) {
+			options.setBaseUrl(connectionProperties.getBaseUrl());
+		}
+		if (connectionProperties.getTimeout() != null) {
+			options.setTimeout(connectionProperties.getTimeout());
+		}
+		if (connectionProperties.getMaxRetries() != null) {
+			options.setMaxRetries(connectionProperties.getMaxRetries());
+		}
+		if (connectionProperties.getProxy() != null) {
+			options.setProxy(connectionProperties.getProxy());
+		}
+		if (!connectionProperties.getCustomHeaders().isEmpty()) {
+			options.setCustomHeaders(connectionProperties.getCustomHeaders());
+		}
+
 		var chatModel = AnthropicChatModel.builder()
-			.anthropicApi(anthropicApi)
-			.defaultOptions(chatProperties.getOptions())
+			.options(options)
 			.toolCallingManager(toolCallingManager)
+			.observationRegistry(observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP))
 			.toolExecutionEligibilityPredicate(anthropicToolExecutionEligibilityPredicate
 				.getIfUnique(DefaultToolExecutionEligibilityPredicate::new))
-			.retryTemplate(retryTemplate.getIfUnique(() -> RetryUtils.DEFAULT_RETRY_TEMPLATE))
-			.observationRegistry(observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP))
 			.build();
 
 		observationConvention.ifAvailable(chatModel::setObservationConvention);
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatProperties.java
index d43fd55e1bb..76aca2577f5 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatProperties.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatProperties.java
@@ -16,7 +16,7 @@
 
 package org.springframework.ai.model.anthropic.autoconfigure;
 
-import org.springframework.ai.anthropic.AnthropicChatModel;
+import org.springframework.ai.anthropic.AbstractAnthropicOptions;
 import org.springframework.ai.anthropic.AnthropicChatOptions;
 import org.springframework.boot.context.properties.ConfigurationProperties;
 import org.springframework.boot.context.properties.NestedConfigurationProperty;
@@ -24,24 +24,20 @@
 /**
  * Anthropic Chat autoconfiguration properties.
  *
- * @author Christian Tzolov
- * @author Alexandros Pappas
- * @since 1.0.0
+ * @author Soby Chacko
+ * @since 2.0.0
  */
 @ConfigurationProperties(AnthropicChatProperties.CONFIG_PREFIX)
-public class AnthropicChatProperties {
+public class AnthropicChatProperties extends AbstractAnthropicOptions {
 
 	public static final String CONFIG_PREFIX = "spring.ai.anthropic.chat";
 
-	/**
-	 * Client lever Ollama options. Use this property to configure generative temperature,
-	 * topK and topP and alike parameters. The null values are ignored defaulting to the
-	 * generative's defaults.
-	 */
+	public static final String DEFAULT_CHAT_MODEL = AnthropicChatOptions.DEFAULT_MODEL;
+
 	@NestedConfigurationProperty
 	private final AnthropicChatOptions options = AnthropicChatOptions.builder()
-		.model(AnthropicChatModel.DEFAULT_MODEL_NAME)
-		.maxTokens(AnthropicChatModel.DEFAULT_MAX_TOKENS)
+		.model(DEFAULT_CHAT_MODEL)
+		.maxTokens(AnthropicChatOptions.DEFAULT_MAX_TOKENS)
 		.build();
 
 	public AnthropicChatOptions getOptions() {
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicConnectionProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicConnectionProperties.java
index 8968da46d69..a25195c1d09 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicConnectionProperties.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicConnectionProperties.java
@@ -16,84 +16,18 @@
 
 package org.springframework.ai.model.anthropic.autoconfigure;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
+import org.springframework.ai.anthropic.AbstractAnthropicOptions;
 import org.springframework.boot.context.properties.ConfigurationProperties;
 
 /**
- * Anthropic API connection properties.
+ * Anthropic connection properties.
  *
- * @author Christian Tzolov
- * @since 1.0.0
+ * @author Soby Chacko
+ * @since 2.0.0
  */
 @ConfigurationProperties(AnthropicConnectionProperties.CONFIG_PREFIX)
-public class AnthropicConnectionProperties {
+public class AnthropicConnectionProperties extends AbstractAnthropicOptions {
 
 	public static final String CONFIG_PREFIX = "spring.ai.anthropic";
 
-	/**
-	 * Anthropic API access key.
-	 */
-	private String apiKey;
-
-	/**
-	 * Anthropic API base URL.
-	 */
-	private String baseUrl = AnthropicApi.DEFAULT_BASE_URL;
-
-	/**
-	 * Path to append to the base URL
-	 */
-	private String completionsPath = AnthropicApi.DEFAULT_MESSAGE_COMPLETIONS_PATH;
-
-	/**
-	 * Anthropic API version.
-	 */
-	private String version = AnthropicApi.DEFAULT_ANTHROPIC_VERSION;
-
-	/**
-	 * Beta features version. Such as tools-2024-04-04 or
-	 * max-tokens-3-5-sonnet-2024-07-15.
-	 */
-	private String betaVersion = AnthropicApi.DEFAULT_ANTHROPIC_BETA_VERSION;
-
-	public String getApiKey() {
-		return this.apiKey;
-	}
-
-	public void setApiKey(String apiKey) {
-		this.apiKey = apiKey;
-	}
-
-	public String getBaseUrl() {
-		return this.baseUrl;
-	}
-
-	public void setBaseUrl(String baseUrl) {
-		this.baseUrl = baseUrl;
-	}
-
-	public String getCompletionsPath() {
-		return this.completionsPath;
-	}
-
-	public void setCompletionsPath(String completionsPath) {
-		this.completionsPath = completionsPath;
-	}
-
-	public String getVersion() {
-		return this.version;
-	}
-
-	public void setVersion(String version) {
-		this.version = version;
-	}
-
-	public String getBetaVersion() {
-		return this.betaVersion;
-	}
-
-	public void setBetaVersion(String betaVersion) {
-		this.betaVersion = betaVersion;
-	}
-
 }
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/StringToToolChoiceConverter.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/StringToToolChoiceConverter.java
deleted file mode 100644
index 5c05d73376b..00000000000
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/StringToToolChoiceConverter.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.model.anthropic.autoconfigure;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.model.ModelOptionsUtils;
-import org.springframework.boot.context.properties.ConfigurationPropertiesBinding;
-import org.springframework.core.convert.converter.Converter;
-import org.springframework.stereotype.Component;
-
-/**
- * Converter to deserialize JSON string into {@link AnthropicApi.ToolChoice}. This
- * converter is used by Spring Boot's configuration properties binding to convert string
- * values from application properties into ToolChoice objects.
- *
- * @author Christian Tzolov
- * @since 1.0.0
- */
-@Component
-@ConfigurationPropertiesBinding
-public class StringToToolChoiceConverter implements Converter<String, AnthropicApi.ToolChoice> {
-
-	@Override
-	public AnthropicApi.ToolChoice convert(String source) {
-		return ModelOptionsUtils.jsonToObject(source, AnthropicApi.ToolChoice.class);
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/package-info.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/package-info.java
similarity index 91%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/package-info.java
rename to auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/package-info.java
index 7ea11a276c1..3bd1ba067b3 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/package-info.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/main/java/org/springframework/ai/model/anthropic/autoconfigure/package-info.java
@@ -15,6 +15,6 @@
  */
 
 @NullMarked
-package org.springframework.ai.anthropic.metadata;
+package org.springframework.ai.model.anthropic.autoconfigure;
 
 import org.jspecify.annotations.NullMarked;
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfigurationIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfigurationIT.java
index 5bc563d6020..57239de5590 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfigurationIT.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicChatAutoConfigurationIT.java
@@ -19,15 +19,14 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import reactor.core.publisher.Flux;
 
 import org.springframework.ai.anthropic.AnthropicChatModel;
 import org.springframework.ai.anthropic.AnthropicChatOptions;
-import org.springframework.ai.anthropic.api.AnthropicApi;
 import org.springframework.ai.chat.messages.AssistantMessage;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatResponse;
@@ -38,13 +37,18 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 
+/**
+ * Integration tests for {@link AnthropicChatAutoConfiguration}.
+ *
+ * @author Soby Chacko
+ */
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".*")
-public class AnthropicChatAutoConfigurationIT {
+class AnthropicChatAutoConfigurationIT {
 
-	private static final Log logger = LogFactory.getLog(AnthropicChatAutoConfigurationIT.class);
+	private static final Logger logger = LoggerFactory.getLogger(AnthropicChatAutoConfigurationIT.class);
 
 	private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
-		.withPropertyValues("spring.ai.anthropic.apiKey=" + System.getenv("ANTHROPIC_API_KEY"))
+		.withPropertyValues("spring.ai.anthropic.api-key=" + System.getenv("ANTHROPIC_API_KEY"))
 		.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class));
 
 	@Test
@@ -53,22 +57,19 @@ void call() {
 			AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
 			String response = chatModel.call("Hello");
 			assertThat(response).isNotEmpty();
-			logger.info("Response: " + response);
+			logger.info("Response: {}", response);
 		});
 	}
 
 	@Test
-	void callWith8KResponseContext() {
-		this.contextRunner
-			.withPropertyValues(
-					"spring.ai.anthropic.chat.options.model=" + AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.run(context -> {
-				AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
-				var options = AnthropicChatOptions.builder().maxTokens(8192).build();
-				var response = chatModel.call(new Prompt("Tell me a joke", options));
-				assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-				logger.info("Response: " + response);
-			});
+	void callWithOptions() {
+		this.contextRunner.run(context -> {
+			AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
+			var options = AnthropicChatOptions.builder().maxTokens(100).build();
+			var response = chatModel.call(new Prompt("Tell me a joke", options));
+			assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+			logger.info("Response: {}", response);
+		});
 	}
 
 	@Test
@@ -87,7 +88,7 @@ void stream() {
 				.collect(Collectors.joining());
 
 			assertThat(response).isNotEmpty();
-			logger.info("Response: " + response);
+			logger.info("Response: {}", response);
 		});
 	}
 
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicModelConfigurationTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicModelConfigurationTests.java
index 35a8b4c276f..a2af7f3cfcb 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicModelConfigurationTests.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicModelConfigurationTests.java
@@ -27,12 +27,12 @@
 /**
  * Unit Tests for {@link AnthropicChatAutoConfiguration}'s conditional enabling of models.
  *
- * @author Ilayaperumal Gopinathan
+ * @author Soby Chacko
  */
-public class AnthropicModelConfigurationTests {
+class AnthropicModelConfigurationTests {
 
 	private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
-		.withPropertyValues("spring.ai.anthropic.apiKey=" + System.getenv("ANTHROPIC_API_KEY"))
+		.withPropertyValues("spring.ai.anthropic.api-key=" + System.getenv("ANTHROPIC_API_KEY"))
 		.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class));
 
 	@Test
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicPropertiesTests.java
index b07d64db8a8..2111334eeb8 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicPropertiesTests.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/AnthropicPropertiesTests.java
@@ -19,32 +19,26 @@
 import org.junit.jupiter.api.Test;
 
 import org.springframework.ai.anthropic.AnthropicChatModel;
-import org.springframework.ai.anthropic.api.AnthropicApi.ToolChoiceTool;
 import org.springframework.ai.utils.SpringAiTestAutoConfigurations;
 import org.springframework.boot.autoconfigure.AutoConfigurations;
-import org.springframework.boot.restclient.autoconfigure.RestClientAutoConfiguration;
 import org.springframework.boot.test.context.runner.ApplicationContextRunner;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
 /**
- * Unit Tests for {@link AnthropicChatProperties}, {@link AnthropicConnectionProperties}.
+ * Unit Tests for {@link AnthropicChatProperties} and
+ * {@link AnthropicConnectionProperties}.
+ *
+ * @author Soby Chacko
  */
-public class AnthropicPropertiesTests {
+class AnthropicPropertiesTests {
 
 	@Test
-	public void connectionProperties() {
-
-		new ApplicationContextRunner().withPropertyValues(
-		// @formatter:off
-					"spring.ai.anthropic.base-url=TEST_BASE_URL",
-					"spring.ai.anthropic.completions-path=message-path",
-					"spring.ai.anthropic.api-key=abc123",
-					"spring.ai.anthropic.version=6666",
-					"spring.ai.anthropic.beta-version=7777",
+	void connectionProperties() {
+		new ApplicationContextRunner()
+			.withPropertyValues("spring.ai.anthropic.base-url=TEST_BASE_URL", "spring.ai.anthropic.api-key=abc123",
 					"spring.ai.anthropic.chat.options.model=MODEL_XYZ",
 					"spring.ai.anthropic.chat.options.temperature=0.55")
-				// @formatter:on
 			.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
 			.run(context -> {
 				var chatProperties = context.getBean(AnthropicChatProperties.class);
@@ -52,9 +46,6 @@ public void connectionProperties() {
 
 				assertThat(connectionProperties.getApiKey()).isEqualTo("abc123");
 				assertThat(connectionProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL");
-				assertThat(connectionProperties.getVersion()).isEqualTo("6666");
-				assertThat(connectionProperties.getBetaVersion()).isEqualTo("7777");
-				assertThat(connectionProperties.getCompletionsPath()).isEqualTo("message-path");
 
 				assertThat(chatProperties.getOptions().getModel()).isEqualTo("MODEL_XYZ");
 				assertThat(chatProperties.getOptions().getTemperature()).isEqualTo(0.55);
@@ -62,25 +53,37 @@ public void connectionProperties() {
 	}
 
 	@Test
-	public void chatOptionsTest() {
+	void chatOverrideConnectionProperties() {
+		new ApplicationContextRunner()
+			.withPropertyValues("spring.ai.anthropic.base-url=TEST_BASE_URL", "spring.ai.anthropic.api-key=abc123",
+					"spring.ai.anthropic.chat.base-url=TEST_BASE_URL_2", "spring.ai.anthropic.chat.api-key=456",
+					"spring.ai.anthropic.chat.options.model=MODEL_XYZ",
+					"spring.ai.anthropic.chat.options.temperature=0.55")
+			.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
+			.run(context -> {
+				var chatProperties = context.getBean(AnthropicChatProperties.class);
+				var connectionProperties = context.getBean(AnthropicConnectionProperties.class);
 
-		new ApplicationContextRunner().withPropertyValues(
-		// @formatter:off
-				"spring.ai.anthropic.api-key=API_KEY",
-				"spring.ai.anthropic.base-url=TEST_BASE_URL",
+				assertThat(connectionProperties.getApiKey()).isEqualTo("abc123");
+				assertThat(connectionProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL");
 
-				"spring.ai.anthropic.chat.options.model=MODEL_XYZ",
-				"spring.ai.anthropic.chat.options.max-tokens=123",
-				"spring.ai.anthropic.chat.options.metadata.user-id=MyUserId",
-				"spring.ai.anthropic.chat.options.stop_sequences=boza,koza",
+				assertThat(chatProperties.getApiKey()).isEqualTo("456");
+				assertThat(chatProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL_2");
 
-				"spring.ai.anthropic.chat.options.temperature=0.55",
-				"spring.ai.anthropic.chat.options.top-p=0.56",
-				"spring.ai.anthropic.chat.options.top-k=100",
+				assertThat(chatProperties.getOptions().getModel()).isEqualTo("MODEL_XYZ");
+				assertThat(chatProperties.getOptions().getTemperature()).isEqualTo(0.55);
+			});
+	}
 
-				"spring.ai.anthropic.chat.options.toolChoice={\"name\":\"toolChoiceFunctionName\",\"type\":\"tool\"}"
-				)
-			// @formatter:on
+	@Test
+	void chatOptionsTest() {
+		new ApplicationContextRunner()
+			.withPropertyValues("spring.ai.anthropic.api-key=API_KEY", "spring.ai.anthropic.base-url=TEST_BASE_URL",
+					"spring.ai.anthropic.chat.options.model=MODEL_XYZ",
+					"spring.ai.anthropic.chat.options.max-tokens=123",
+					"spring.ai.anthropic.chat.options.stop-sequences=boza,koza",
+					"spring.ai.anthropic.chat.options.temperature=0.55", "spring.ai.anthropic.chat.options.top-p=0.56",
+					"spring.ai.anthropic.chat.options.top-k=100")
 			.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
 			.run(context -> {
 				var chatProperties = context.getBean(AnthropicChatProperties.class);
@@ -94,20 +97,12 @@ public void chatOptionsTest() {
 				assertThat(chatProperties.getOptions().getTemperature()).isEqualTo(0.55);
 				assertThat(chatProperties.getOptions().getTopP()).isEqualTo(0.56);
 				assertThat(chatProperties.getOptions().getTopK()).isEqualTo(100);
-
-				assertThat(chatProperties.getOptions().getMetadata().userId()).isEqualTo("MyUserId");
-
-				assertThat(chatProperties.getOptions().getToolChoice()).isNotNull();
-				assertThat(chatProperties.getOptions().getToolChoice().type()).isEqualTo("tool");
-				assertThat(((ToolChoiceTool) chatProperties.getOptions().getToolChoice()).name())
-					.isEqualTo("toolChoiceFunctionName");
 			});
 	}
 
 	@Test
-	public void chatCompletionDisabled() {
-
-		// It is enabled by default
+	void chatCompletionDisabled() {
+		// Enabled by default
 		new ApplicationContextRunner().withPropertyValues("spring.ai.anthropic.api-key=API_KEY")
 			.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
 			.run(context -> {
@@ -115,7 +110,7 @@ public void chatCompletionDisabled() {
 				assertThat(context.getBeansOfType(AnthropicChatModel.class)).isNotEmpty();
 			});
 
-		// Explicitly enable the chat auto-configuration.
+		// Explicitly enable
 		new ApplicationContextRunner()
 			.withPropertyValues("spring.ai.anthropic.api-key=API_KEY", "spring.ai.model.chat=anthropic")
 			.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
@@ -124,10 +119,9 @@ public void chatCompletionDisabled() {
 				assertThat(context.getBeansOfType(AnthropicChatModel.class)).isNotEmpty();
 			});
 
-		// Explicitly disable the chat auto-configuration.
+		// Explicitly disable
 		new ApplicationContextRunner().withPropertyValues("spring.ai.model.chat=none")
-			.withConfiguration(
-					AutoConfigurations.of(RestClientAutoConfiguration.class, AnthropicChatAutoConfiguration.class))
+			.withConfiguration(AutoConfigurations.of(AnthropicChatAutoConfiguration.class))
 			.run(context -> assertThat(context.getBeansOfType(AnthropicChatModel.class)).isEmpty());
 	}
 
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/BaseAnthropicIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/BaseAnthropicIT.java
deleted file mode 100644
index c6f6f2d0cf3..00000000000
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/BaseAnthropicIT.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.model.anthropic.autoconfigure;
-
-import java.util.Arrays;
-import java.util.stream.Stream;
-
-import org.springframework.ai.model.tool.autoconfigure.ToolCallingAutoConfiguration;
-import org.springframework.boot.autoconfigure.AutoConfigurations;
-import org.springframework.boot.restclient.autoconfigure.RestClientAutoConfiguration;
-import org.springframework.boot.webclient.autoconfigure.WebClientAutoConfiguration;
-
-public abstract class BaseAnthropicIT {
-
-	public static AutoConfigurations anthropicAutoConfig(Class<?>... additional) {
-		Class<?>[] dependencies = { ToolCallingAutoConfiguration.class, RestClientAutoConfiguration.class,
-				WebClientAutoConfiguration.class };
-		Class<?>[] all = Stream.concat(Arrays.stream(dependencies), Arrays.stream(additional)).toArray(Class<?>[]::new);
-		return AutoConfigurations.of(all);
-	}
-
-}
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithFunctionBeanIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithFunctionBeanIT.java
index a36df341133..1d1ee29649b 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithFunctionBeanIT.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithFunctionBeanIT.java
@@ -19,6 +19,7 @@
 import java.util.List;
 import java.util.function.Function;
 
+import com.anthropic.models.messages.Model;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import org.slf4j.Logger;
@@ -26,7 +27,6 @@
 
 import org.springframework.ai.anthropic.AnthropicChatModel;
 import org.springframework.ai.anthropic.AnthropicChatOptions;
-import org.springframework.ai.anthropic.api.AnthropicApi;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
@@ -42,28 +42,32 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 
+/**
+ * Integration test for tool calling via Spring bean-registered function callbacks.
+ *
+ * @author Soby Chacko
+ */
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".*")
 class FunctionCallWithFunctionBeanIT {
 
 	private final Logger logger = LoggerFactory.getLogger(FunctionCallWithFunctionBeanIT.class);
 
 	private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
-		.withPropertyValues("spring.ai.anthropic.apiKey=" + System.getenv("ANTHROPIC_API_KEY"))
+		.withPropertyValues("spring.ai.anthropic.api-key=" + System.getenv("ANTHROPIC_API_KEY"))
 		.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class))
 		.withUserConfiguration(Config.class);
 
 	@Test
 	void functionCallTest() {
-
 		this.contextRunner
-			.withPropertyValues(
-					"spring.ai.anthropic.chat.options.model=" + AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.withPropertyValues("spring.ai.anthropic.chat.options.model=" + Model.CLAUDE_HAIKU_4_5.asString())
 			.run(context -> {
 
 				AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
 
 				var userMessage = new UserMessage(
-						"What's the weather like in San Francisco, in Paris, France and in Tokyo, Japan? Return the temperature in Celsius.");
+						"What's the weather like in San Francisco, in Paris, France and in Tokyo, Japan?"
+								+ " Return the temperature in Celsius.");
 
 				ChatResponse response = chatModel.call(new Prompt(List.of(userMessage),
 						AnthropicChatOptions.builder().toolNames("weatherFunction").build()));
@@ -78,22 +82,20 @@ void functionCallTest() {
 				logger.info("Response: {}", response);
 
 				assertThat(response.getResult().getOutput().getText()).contains("30", "10", "15");
-
 			});
 	}
 
 	@Test
 	void functionCallWithPortableFunctionCallingOptions() {
-
 		this.contextRunner
-			.withPropertyValues(
-					"spring.ai.anthropic.chat.options.model=" + AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.withPropertyValues("spring.ai.anthropic.chat.options.model=" + Model.CLAUDE_HAIKU_4_5.asString())
 			.run(context -> {
 
 				AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
 
 				var userMessage = new UserMessage(
-						"What's the weather like in San Francisco, in Paris, France and in Tokyo, Japan? Return the temperature in Celsius.");
+						"What's the weather like in San Francisco, in Paris, France and in Tokyo, Japan?"
+								+ " Return the temperature in Celsius.");
 
 				ChatResponse response = chatModel.call(new Prompt(List.of(userMessage),
 						ToolCallingChatOptions.builder().toolNames("weatherFunction").build()));
@@ -113,8 +115,6 @@ public Function<Request, Response> weatherFunction() {
 			return new MockWeatherService();
 		}
 
-		// Relies on the Request's JsonClassDescription annotation to provide the
-		// function description.
 		@Bean
 		public Function<MockWeatherService.Request, MockWeatherService.Response> weatherFunction3() {
 			MockWeatherService weatherService = new MockWeatherService();
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithPromptFunctionIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithPromptFunctionIT.java
index d028e10cec0..64c3cb758ad 100644
--- a/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithPromptFunctionIT.java
+++ b/auto-configurations/models/spring-ai-autoconfigure-model-anthropic/src/test/java/org/springframework/ai/model/anthropic/autoconfigure/tool/FunctionCallWithPromptFunctionIT.java
@@ -25,7 +25,6 @@
 
 import org.springframework.ai.anthropic.AnthropicChatModel;
 import org.springframework.ai.anthropic.AnthropicChatOptions;
-import org.springframework.ai.anthropic.api.AnthropicApi;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
@@ -36,40 +35,42 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 
+/**
+ * Integration test for tool calling via prompt-level function callbacks.
+ *
+ * @author Soby Chacko
+ */
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".*")
-public class FunctionCallWithPromptFunctionIT {
+class FunctionCallWithPromptFunctionIT {
 
 	private final Logger logger = LoggerFactory.getLogger(FunctionCallWithPromptFunctionIT.class);
 
 	private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
-		.withPropertyValues("spring.ai.anthropic.apiKey=" + System.getenv("ANTHROPIC_API_KEY"))
+		.withPropertyValues("spring.ai.anthropic.api-key=" + System.getenv("ANTHROPIC_API_KEY"))
 		.withConfiguration(SpringAiTestAutoConfigurations.of(AnthropicChatAutoConfiguration.class));
 
 	@Test
 	void functionCallTest() {
-		this.contextRunner
-			.withPropertyValues(
-					"spring.ai.anthropic.chat.options.model=" + AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.run(context -> {
+		this.contextRunner.run(context -> {
 
-				AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
+			AnthropicChatModel chatModel = context.getBean(AnthropicChatModel.class);
 
-				UserMessage userMessage = new UserMessage(
-						"What's the weather like in San Francisco, in Paris and in Tokyo? Return the temperature in Celsius.");
+			UserMessage userMessage = new UserMessage("What's the weather like in San Francisco, in Paris and in Tokyo?"
+					+ " Return the temperature in Celsius.");
 
-				var promptOptions = AnthropicChatOptions.builder()
-					.toolCallbacks(List.of(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
-						.description("Get the weather in location. Return temperature in 36°F or 36°C format.")
-						.inputType(MockWeatherService.Request.class)
-						.build()))
-					.build();
+			var promptOptions = AnthropicChatOptions.builder()
+				.toolCallbacks(List.of(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
+					.description("Get the weather in location. Return temperature in 36°F or 36°C format.")
+					.inputType(MockWeatherService.Request.class)
+					.build()))
+				.build();
 
-				ChatResponse response = chatModel.call(new Prompt(List.of(userMessage), promptOptions));
+			ChatResponse response = chatModel.call(new Prompt(List.of(userMessage), promptOptions));
 
-				logger.info("Response: {}", response);
+			logger.info("Response: {}", response);
 
-				assertThat(response.getResult().getOutput().getText()).contains("30", "10", "15");
-			});
+			assertThat(response.getResult().getOutput().getText()).contains("30", "10", "15");
+		});
 	}
 
 }
diff --git a/models/spring-ai-anthropic/README.md b/models/spring-ai-anthropic/README.md
index 5c62eb9c47a..a04992fab61 100644
--- a/models/spring-ai-anthropic/README.md
+++ b/models/spring-ai-anthropic/README.md
@@ -1,2 +1,282 @@
-[Anthropic Chat Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)
+# Anthropic Java SDK Integration
 
+This module integrates the official Anthropic Java SDK with Spring AI, providing access to Claude models through Anthropic's API.
+
+[Anthropic Java SDK GitHub repository](https://github.com/anthropics/anthropic-sdk-java)
+
+## Authentication
+
+Configure your Anthropic API key either programmatically or via environment variable:
+
+```java
+AnthropicChatOptions options = AnthropicChatOptions.builder()
+    .apiKey("<your-api-key>")
+    .build();
+```
+
+Or using the environment variable (automatically detected):
+
+```bash
+export ANTHROPIC_API_KEY=<your-api-key>
+```
+
+## Features
+
+This module supports:
+
+- **Chat Completions** - Synchronous and streaming responses
+- **Tool Calling** - Function calling with automatic tool execution
+- **Streaming Tool Calling** - Tool calls in streaming mode with partial JSON accumulation
+- **Multi-Modal** - Images and PDF documents
+- **Extended Thinking** - Claude's thinking/reasoning feature with full streaming support
+- **Citations** - Document-grounded responses with source attribution
+- **Prompt Caching** - Reduce costs for repeated context with configurable strategies
+- **Structured Output** - JSON schema-constrained responses with effort control
+- **Per-Request HTTP Headers** - Custom headers per API call for tracking, beta features, and routing
+- **Observability** - Micrometer-based metrics and tracing
+
+### Planned Features
+
+- **Amazon Bedrock** - Access Claude through AWS Bedrock
+- **Google Vertex AI** - Access Claude through Google Cloud
+
+## Basic Usage
+
+```java
+// Create chat model with default options
+AnthropicChatModel chatModel = new AnthropicChatModel(
+    AnthropicChatOptions.builder()
+        .model("claude-sonnet-4-20250514")
+        .maxTokens(1024)
+        .build()
+);
+
+// Synchronous call
+ChatResponse response = chatModel.call(new Prompt("Hello, Claude!"));
+
+// Streaming call
+Flux<ChatResponse> stream = chatModel.stream(new Prompt("Tell me a story"));
+```
+
+## Tool Calling
+
+```java
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .toolCallbacks(FunctionToolCallback.builder("getWeather", new WeatherService())
+        .description("Get the current weather for a location")
+        .inputType(WeatherRequest.class)
+        .build())
+    .build();
+
+ChatResponse response = chatModel.call(new Prompt("What's the weather in Paris?", options));
+```
+
+## Extended Thinking
+
+Enable Claude's reasoning feature to see step-by-step thinking before the final answer:
+
+```java
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .temperature(1.0) // required when thinking is enabled
+    .maxTokens(16000)
+    .thinkingEnabled(10000L) // budget must be >= 1024 and < maxTokens
+    .build();
+
+ChatResponse response = chatModel.call(new Prompt("Solve this step by step...", options));
+```
+
+Three thinking modes are available via convenience builders:
+- `thinkingEnabled(budgetTokens)` - Enable with a specific token budget
+- `thinkingAdaptive()` - Let Claude decide whether to think
+- `thinkingDisabled()` - Explicitly disable thinking
+
+Thinking is fully supported in both synchronous and streaming modes, including signature capture for thinking block verification.
+
+## Citations
+
+Anthropic's Citations API allows Claude to reference specific parts of provided documents when generating responses. Three document types are supported: plain text, PDF, and custom content blocks.
+
+```java
+// Create a citation document
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
+    .plainText("The Eiffel Tower was completed in 1889 in Paris, France. " +
+               "It stands 330 meters tall and was designed by Gustave Eiffel.")
+    .title("Eiffel Tower Facts")
+    .citationsEnabled(true)
+    .build();
+
+// Call the model with the document
+ChatResponse response = chatModel.call(
+    new Prompt(
+        "When was the Eiffel Tower built?",
+        AnthropicChatOptions.builder()
+            .model("claude-sonnet-4-20250514")
+            .maxTokens(1024)
+            .citationDocuments(document)
+            .build()
+    )
+);
+
+// Access citations from response metadata
+List<Citation> citations = (List<Citation>) response.getMetadata().get("citations");
+for (Citation citation : citations) {
+    System.out.println("Document: " + citation.getDocumentTitle());
+    System.out.println("Cited text: " + citation.getCitedText());
+}
+```
+
+PDF and custom content block documents are also supported via `pdfFile()`, `pdf()`, and `customContent()` builders.
+
+## Prompt Caching
+
+Prompt caching reduces costs and latency by caching repeated context (system prompts, tool definitions, conversation history) across API calls. Five caching strategies are available:
+
+| Strategy | Description |
+|----------|-------------|
+| `NONE` | No caching (default) |
+| `SYSTEM_ONLY` | Cache system message content |
+| `TOOLS_ONLY` | Cache tool definitions |
+| `SYSTEM_AND_TOOLS` | Cache both system messages and tool definitions |
+| `CONVERSATION_HISTORY` | Cache system messages, tools, and conversation messages |
+
+```java
+// Cache system messages to reduce costs for repeated prompts
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .maxTokens(1024)
+    .cacheOptions(AnthropicCacheOptions.builder()
+        .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
+        .build())
+    .build();
+
+ChatResponse response = chatModel.call(
+    new Prompt(List.of(
+        new SystemMessage("You are an expert assistant with deep domain knowledge..."),
+        new UserMessage("What is the capital of France?")),
+        options));
+
+// Access cache token usage via native SDK usage
+com.anthropic.models.messages.Usage sdkUsage =
+    (com.anthropic.models.messages.Usage) response.getMetadata().getUsage().getNativeUsage();
+long cacheCreation = sdkUsage.cacheCreationInputTokens().orElse(0L);
+long cacheRead = sdkUsage.cacheReadInputTokens().orElse(0L);
+```
+
+You can also configure TTL (5 minutes or 1 hour), minimum content length thresholds, and multi-block system caching for static vs. dynamic system message segments:
+
+```java
+var options = AnthropicCacheOptions.builder()
+    .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+    .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+    .messageTypeMinContentLength(MessageType.SYSTEM, 100)
+    .multiBlockSystemCaching(true)
+    .build();
+```
+
+## Structured Output
+
+Structured output constrains Claude to produce responses conforming to a JSON schema. The SDK module also supports Anthropic's effort control for tuning response quality vs speed.
+
+> **Model Requirement:** Structured output and effort control require `claude-sonnet-4-6` or newer. Older models like `claude-sonnet-4-20250514` do not support these features.
+
+### JSON Schema Output
+
+```java
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputSchema("""
+        {
+            "type": "object",
+            "properties": {
+                "name": {"type": "string"},
+                "capital": {"type": "string"},
+                "population": {"type": "integer"}
+            },
+            "required": ["name", "capital"],
+            "additionalProperties": false
+        }
+        """)
+    .build();
+
+ChatResponse response = chatModel.call(new Prompt("Tell me about France.", options));
+// Response text will be valid JSON conforming to the schema
+```
+
+### Effort Control
+
+Control how much compute Claude spends on its response. Lower effort means faster, cheaper responses; higher effort means more thorough reasoning.
+
+```java
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .effort(OutputConfig.Effort.LOW) // LOW, MEDIUM, HIGH, or MAX
+    .build();
+```
+
+### Combined Schema + Effort
+
+```java
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputSchema("{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"integer\"}},\"required\":[\"answer\"],\"additionalProperties\":false}")
+    .effort(OutputConfig.Effort.HIGH)
+    .build();
+```
+
+### Direct OutputConfig
+
+For full control, use the SDK's `OutputConfig` directly:
+
+```java
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.JsonOutputFormat;
+import com.anthropic.core.JsonValue;
+
+var outputConfig = OutputConfig.builder()
+    .effort(OutputConfig.Effort.HIGH)
+    .format(JsonOutputFormat.builder()
+        .schema(JsonOutputFormat.Schema.builder()
+            .putAdditionalProperty("type", JsonValue.from("object"))
+            .putAdditionalProperty("properties", JsonValue.from(Map.of(
+                "name", Map.of("type", "string"))))
+            .putAdditionalProperty("additionalProperties", JsonValue.from(false))
+            .build())
+        .build())
+    .build();
+
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputConfig(outputConfig)
+    .build();
+```
+
+## Per-Request HTTP Headers
+
+Add custom HTTP headers to individual API calls. Unlike `customHeaders` (which apply to all requests at the client level), `httpHeaders` are set per request.
+
+```java
+var options = AnthropicChatOptions.builder()
+    .httpHeaders(Map.of(
+        "X-Request-Id", "req-12345",
+        "X-Custom-Tracking", "my-value"))
+    .build();
+
+ChatResponse response = chatModel.call(new Prompt("Hello", options));
+```
+
+## Logging
+
+Enable SDK logging by setting the environment variable:
+
+```bash
+export ANTHROPIC_LOG=debug
+```
+
+## Documentation
+
+For comprehensive documentation, see:
+- [Spring AI Anthropic Reference Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)
+- [Anthropic API Documentation](https://docs.anthropic.com/)
+- [Anthropic Java SDK Documentation](https://github.com/anthropics/anthropic-sdk-java)
diff --git a/models/spring-ai-anthropic/pom.xml b/models/spring-ai-anthropic/pom.xml
index 09e5176ca57..1d54e7a0961 100644
--- a/models/spring-ai-anthropic/pom.xml
+++ b/models/spring-ai-anthropic/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  ~ Copyright 2023-present the original author or authors.
+  ~ Copyright 2023-2025 the original author or authors.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License");
   ~ you may not use this file except in compliance with the License.
@@ -15,9 +15,7 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<parent>
 		<groupId>org.springframework.ai</groupId>
@@ -37,9 +35,6 @@
 		<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
 	</scm>
 
-	<properties>
-	</properties>
-
 	<dependencies>
 
 		<!-- production dependencies -->
@@ -50,36 +45,9 @@
 		</dependency>
 
 		<dependency>
-			<groupId>org.springframework.ai</groupId>
-			<artifactId>spring-ai-retry</artifactId>
-			<version>${project.parent.version}</version>
-		</dependency>
-
-		<dependency>
-			<groupId>io.rest-assured</groupId>
-			<artifactId>json-path</artifactId>
-		</dependency>
-
-		<dependency>
-			<groupId>com.github.victools</groupId>
-			<artifactId>jsonschema-generator</artifactId>
-			<version>${jsonschema.version}</version>
-		</dependency>
-
-		<dependency>
-			<groupId>com.github.victools</groupId>
-			<artifactId>jsonschema-module-jackson</artifactId>
-			<version>${jsonschema.version}</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.springframework</groupId>
-			<artifactId>spring-context-support</artifactId>
-		</dependency>
-
-		<dependency>
-			<groupId>org.springframework</groupId>
-			<artifactId>spring-webflux</artifactId>
+			<groupId>com.anthropic</groupId>
+			<artifactId>anthropic-java</artifactId>
+			<version>${anthropic-sdk.version}</version>
 		</dependency>
 
 		<dependency>
@@ -96,21 +64,14 @@
 		</dependency>
 
 		<dependency>
-			<groupId>io.micrometer</groupId>
-			<artifactId>micrometer-observation-test</artifactId>
+			<groupId>org.springframework.boot</groupId>
+			<artifactId>spring-boot-starter-test</artifactId>
 			<scope>test</scope>
 		</dependency>
 
-	<dependency>
-		<groupId>tools.jackson.dataformat</groupId>
-		<artifactId>jackson-dataformat-xml</artifactId>
-		<scope>test</scope>
-	</dependency>
-
 		<dependency>
-			<groupId>net.javacrumbs.json-unit</groupId>
-			<artifactId>json-unit-assertj</artifactId>
-			<version>${json-unit-assertj.version}</version>
+			<groupId>io.micrometer</groupId>
+			<artifactId>micrometer-observation-test</artifactId>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AbstractAnthropicOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AbstractAnthropicOptions.java
new file mode 100644
index 00000000000..152f49dde95
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AbstractAnthropicOptions.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.net.Proxy;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Base class for common Anthropic SDK configuration options, extended by
+ * {@link AnthropicChatOptions}.
+ *
+ * <p>
+ * Supports environment variables {@code ANTHROPIC_API_KEY} and {@code ANTHROPIC_BASE_URL}
+ * for configuration.
+ *
+ * @author Soby Chacko
+ * @since 2.0.0
+ * @see AnthropicChatOptions
+ */
+public class AbstractAnthropicOptions {
+
+	/**
+	 * The base URL to connect to the Anthropic API. Defaults to
+	 * "https://api.anthropic.com" if not specified.
+	 */
+	private @Nullable String baseUrl;
+
+	/**
+	 * The API key to authenticate with the Anthropic API. Can also be set via the
+	 * ANTHROPIC_API_KEY environment variable.
+	 */
+	private @Nullable String apiKey;
+
+	/**
+	 * The model name to use for requests.
+	 */
+	private @Nullable String model;
+
+	/**
+	 * Request timeout for the Anthropic client. Defaults to 60 seconds if not specified.
+	 */
+	private @Nullable Duration timeout;
+
+	/**
+	 * Maximum number of retries for failed requests. Defaults to 2 if not specified.
+	 */
+	private @Nullable Integer maxRetries;
+
+	/**
+	 * Proxy settings for the Anthropic client.
+	 */
+	private @Nullable Proxy proxy;
+
+	/**
+	 * Custom HTTP headers to add to Anthropic client requests.
+	 */
+	private Map<String, String> customHeaders = new HashMap<>();
+
+	public @Nullable String getBaseUrl() {
+		return this.baseUrl;
+	}
+
+	public void setBaseUrl(@Nullable String baseUrl) {
+		this.baseUrl = baseUrl;
+	}
+
+	public @Nullable String getApiKey() {
+		return this.apiKey;
+	}
+
+	public void setApiKey(@Nullable String apiKey) {
+		this.apiKey = apiKey;
+	}
+
+	public @Nullable String getModel() {
+		return this.model;
+	}
+
+	public void setModel(@Nullable String model) {
+		this.model = model;
+	}
+
+	public @Nullable Duration getTimeout() {
+		return this.timeout;
+	}
+
+	public void setTimeout(@Nullable Duration timeout) {
+		this.timeout = timeout;
+	}
+
+	public @Nullable Integer getMaxRetries() {
+		return this.maxRetries;
+	}
+
+	public void setMaxRetries(@Nullable Integer maxRetries) {
+		this.maxRetries = maxRetries;
+	}
+
+	public @Nullable Proxy getProxy() {
+		return this.proxy;
+	}
+
+	public void setProxy(@Nullable Proxy proxy) {
+		this.proxy = proxy;
+	}
+
+	public Map<String, String> getCustomHeaders() {
+		return this.customHeaders;
+	}
+
+	public void setCustomHeaders(Map<String, String> customHeaders) {
+		this.customHeaders = customHeaders;
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheOptions.java
similarity index 73%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheOptions.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheOptions.java
index a06baf5d4b3..d062babdc89 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheOptions.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheOptions.java
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api;
+package org.springframework.ai.anthropic;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Objects;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -27,50 +28,37 @@
 import org.springframework.ai.chat.messages.MessageType;
 
 /**
- * Anthropic cache options for configuring prompt caching behavior.
+ * Anthropic cache options for configuring prompt caching behavior with the Anthropic Java
+ * SDK.
  *
  * @author Austin Dase
+ * @author Soby Chacko
  * @since 1.1.0
- **/
+ */
 public class AnthropicCacheOptions {
 
-	public static AnthropicCacheOptions DISABLED = new AnthropicCacheOptions();
+	/**
+	 * Returns a new disabled cache options instance with strategy {@code NONE}. Each call
+	 * returns a fresh instance to avoid shared mutable state.
+	 */
+	public static AnthropicCacheOptions disabled() {
+		return new AnthropicCacheOptions();
+	}
 
 	private static final int DEFAULT_MIN_CONTENT_LENGTH = 1;
 
 	private AnthropicCacheStrategy strategy = AnthropicCacheStrategy.NONE;
 
-	/**
-	 * Function to determine the content length of a message. Defaults to the length of
-	 * the string, or {@code 0} if the string is {@code null}. This is used as a proxy for
-	 * number of tokens because Anthropic does document that messages with too few tokens
-	 * are not eligible for caching - see <a href=
-	 * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Anthropic
-	 * Caching Limitations</a>. Further, the function can be customized to use a more
-	 * accurate token count if desired.
-	 */
 	private Function<@Nullable String, Integer> contentLengthFunction = s -> s != null ? s.length() : 0;
 
-	/**
-	 * Configure on a per {@link MessageType} basis the TTL (time-to-live) for cached
-	 * prompts. Defaults to {@link AnthropicCacheTtl#FIVE_MINUTES}. Note that different
-	 * caches have different write costs, see <a href=
-	 * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#understanding-cache-breakpoint-costs">Anthropic
-	 * Cache Breakpoint Costs</a>
-	 */
 	private Map<MessageType, AnthropicCacheTtl> messageTypeTtl = Stream.of(MessageType.values())
 		.collect(Collectors.toMap(mt -> mt, mt -> AnthropicCacheTtl.FIVE_MINUTES, (m1, m2) -> m1, HashMap::new));
 
-	/**
-	 * Configure on a per {@link MessageType} basis the minimum content length required to
-	 * consider a message for caching. Defaults to {@code 1}. This is used in conjunction
-	 * with the {@link #contentLengthFunction} to determine if a message is eligible for
-	 * caching based on its content length. Helping to optimize the usage of the limited
-	 * cache breakpoints (4 max) allowed by Anthropic.
-	 */
 	private Map<MessageType, Integer> messageTypeMinContentLengths = Stream.of(MessageType.values())
 		.collect(Collectors.toMap(mt -> mt, mt -> DEFAULT_MIN_CONTENT_LENGTH, (m1, m2) -> m1, HashMap::new));
 
+	private boolean multiBlockSystemCaching = false;
+
 	public static Builder builder() {
 		return new Builder();
 	}
@@ -107,11 +95,39 @@ public void setMessageTypeMinContentLengths(Map<MessageType, Integer> messageTyp
 		this.messageTypeMinContentLengths = messageTypeMinContentLengths;
 	}
 
+	public boolean isMultiBlockSystemCaching() {
+		return this.multiBlockSystemCaching;
+	}
+
+	public void setMultiBlockSystemCaching(boolean multiBlockSystemCaching) {
+		this.multiBlockSystemCaching = multiBlockSystemCaching;
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (this == o) {
+			return true;
+		}
+		if (!(o instanceof AnthropicCacheOptions that)) {
+			return false;
+		}
+		return this.multiBlockSystemCaching == that.multiBlockSystemCaching && this.strategy == that.strategy
+				&& Objects.equals(this.messageTypeTtl, that.messageTypeTtl)
+				&& Objects.equals(this.messageTypeMinContentLengths, that.messageTypeMinContentLengths);
+	}
+
+	@Override
+	public int hashCode() {
+		return Objects.hash(this.strategy, this.messageTypeTtl, this.messageTypeMinContentLengths,
+				this.multiBlockSystemCaching);
+	}
+
 	@Override
 	public String toString() {
 		return "AnthropicCacheOptions{" + "strategy=" + this.strategy + ", contentLengthFunction="
 				+ this.contentLengthFunction + ", messageTypeTtl=" + this.messageTypeTtl
-				+ ", messageTypeMinContentLengths=" + this.messageTypeMinContentLengths + '}';
+				+ ", messageTypeMinContentLengths=" + this.messageTypeMinContentLengths + ", multiBlockSystemCaching="
+				+ this.multiBlockSystemCaching + '}';
 	}
 
 	public static final class Builder {
@@ -148,6 +164,11 @@ public Builder messageTypeMinContentLength(MessageType messageType, Integer minC
 			return this;
 		}
 
+		public Builder multiBlockSystemCaching(boolean multiBlockSystemCaching) {
+			this.options.setMultiBlockSystemCaching(multiBlockSystemCaching);
+			return this;
+		}
+
 		public AnthropicCacheOptions build() {
 			return this.options;
 		}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheStrategy.java
new file mode 100644
index 00000000000..2c675e7c3d0
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheStrategy.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+/**
+ * Defines the caching strategy for Anthropic prompt caching. Anthropic allows up to 4
+ * cache breakpoints per request, and the cache hierarchy follows the order: tools ->
+ * system -> messages.
+ *
+ * @author Mark Pollack
+ * @author Soby Chacko
+ * @since 1.1.0
+ */
+public enum AnthropicCacheStrategy {
+
+	/**
+	 * No caching (default behavior). All content is processed fresh on each request.
+	 */
+	NONE,
+
+	/**
+	 * Cache tool definitions only. Places a cache breakpoint on the last tool, while
+	 * system messages and conversation history remain uncached.
+	 */
+	TOOLS_ONLY,
+
+	/**
+	 * Cache system instructions only. Places a cache breakpoint on the system message
+	 * content. Tools are cached implicitly via Anthropic's automatic lookback mechanism.
+	 */
+	SYSTEM_ONLY,
+
+	/**
+	 * Cache system instructions and tool definitions. Places cache breakpoints on the
+	 * last tool (breakpoint 1) and system message content (breakpoint 2).
+	 */
+	SYSTEM_AND_TOOLS,
+
+	/**
+	 * Cache the entire conversation history up to (but not including) the current user
+	 * question. Places a cache breakpoint on the last user message in the conversation
+	 * history, enabling incremental caching as the conversation grows.
+	 */
+	CONVERSATION_HISTORY
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheTtl.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheTtl.java
similarity index 62%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheTtl.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheTtl.java
index 21c80b5b1e6..06a61170cda 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheTtl.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCacheTtl.java
@@ -14,29 +14,35 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api;
+package org.springframework.ai.anthropic;
+
+import com.anthropic.models.messages.CacheControlEphemeral;
 
 /**
- * Anthropic cache TTL (time-to-live) options for specifying how long cached prompts See
- * the Anthropic documentation for more details: <a href=
- * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration">Anthropic
- * Prompt Caching</a>
+ * Anthropic cache TTL (time-to-live) options for specifying how long cached prompts
+ * remain valid. Wraps the SDK's {@link CacheControlEphemeral.Ttl} enum values.
  *
  * @author Austin Dase
+ * @author Soby Chacko
  * @since 1.1.0
- **/
+ * @see <a href=
+ * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration">Anthropic
+ * Prompt Caching</a>
+ */
 public enum AnthropicCacheTtl {
 
-	FIVE_MINUTES("5m"), ONE_HOUR("1h");
+	FIVE_MINUTES(CacheControlEphemeral.Ttl.TTL_5M),
+
+	ONE_HOUR(CacheControlEphemeral.Ttl.TTL_1H);
 
-	private final String value;
+	private final CacheControlEphemeral.Ttl sdkTtl;
 
-	AnthropicCacheTtl(String value) {
-		this.value = value;
+	AnthropicCacheTtl(CacheControlEphemeral.Ttl sdkTtl) {
+		this.sdkTtl = sdkTtl;
 	}
 
-	public String getValue() {
-		return this.value;
+	public CacheControlEphemeral.Ttl getSdkTtl() {
+		return this.sdkTtl;
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
index 8f280b861fb..ddd9ee44f78 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
@@ -22,9 +22,41 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
-import java.util.Set;
-import java.util.stream.Collectors;
-
+import java.util.Optional;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.client.AnthropicClientAsync;
+import com.anthropic.core.JsonValue;
+import com.anthropic.models.messages.Base64ImageSource;
+import com.anthropic.models.messages.Base64PdfSource;
+import com.anthropic.models.messages.CacheControlEphemeral;
+import com.anthropic.models.messages.CitationCharLocation;
+import com.anthropic.models.messages.CitationContentBlockLocation;
+import com.anthropic.models.messages.CitationPageLocation;
+import com.anthropic.models.messages.CitationsDelta;
+import com.anthropic.models.messages.CodeExecutionTool20260120;
+import com.anthropic.models.messages.ContentBlock;
+import com.anthropic.models.messages.ContentBlockParam;
+import com.anthropic.models.messages.DocumentBlockParam;
+import com.anthropic.models.messages.ImageBlockParam;
+import com.anthropic.models.messages.Message;
+import com.anthropic.models.messages.MessageCreateParams;
+import com.anthropic.models.messages.RawMessageStreamEvent;
+import com.anthropic.models.messages.RedactedThinkingBlock;
+import com.anthropic.models.messages.TextBlock;
+import com.anthropic.models.messages.TextBlockParam;
+import com.anthropic.models.messages.TextCitation;
+import com.anthropic.models.messages.ThinkingBlock;
+import com.anthropic.models.messages.Tool;
+import com.anthropic.models.messages.ToolChoice;
+import com.anthropic.models.messages.ToolChoiceAuto;
+import com.anthropic.models.messages.ToolResultBlockParam;
+import com.anthropic.models.messages.ToolUnion;
+import com.anthropic.models.messages.ToolUseBlock;
+import com.anthropic.models.messages.ToolUseBlockParam;
+import com.anthropic.models.messages.UrlImageSource;
+import com.anthropic.models.messages.UrlPdfSource;
 import io.micrometer.observation.Observation;
 import io.micrometer.observation.ObservationRegistry;
 import io.micrometer.observation.contextpropagation.ObservationThreadLocalAccessor;
@@ -32,24 +64,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import reactor.core.publisher.Flux;
-import reactor.core.publisher.Mono;
-import reactor.core.scheduler.Schedulers;
-import tools.jackson.core.type.TypeReference;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Source;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
-import org.springframework.ai.anthropic.api.CitationDocument;
-import org.springframework.ai.anthropic.api.utils.CacheEligibilityResolver;
+
 import org.springframework.ai.chat.messages.AssistantMessage;
-import org.springframework.ai.chat.messages.Message;
+import org.springframework.ai.chat.messages.AssistantMessage.ToolCall;
 import org.springframework.ai.chat.messages.MessageType;
 import org.springframework.ai.chat.messages.ToolResponseMessage;
 import org.springframework.ai.chat.messages.UserMessage;
@@ -62,6 +79,7 @@
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.model.Generation;
 import org.springframework.ai.chat.model.MessageAggregator;
+import org.springframework.ai.chat.model.StreamingChatModel;
 import org.springframework.ai.chat.observation.ChatModelObservationContext;
 import org.springframework.ai.chat.observation.ChatModelObservationConvention;
 import org.springframework.ai.chat.observation.ChatModelObservationDocumentation;
@@ -75,20 +93,21 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
-import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
-import org.springframework.ai.retry.RetryUtils;
+import org.springframework.ai.observation.conventions.AiProvider;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
-import org.springframework.ai.util.json.JsonParser;
-import org.springframework.core.retry.RetryTemplate;
-import org.springframework.http.HttpHeaders;
-import org.springframework.http.ResponseEntity;
 import org.springframework.util.Assert;
 import org.springframework.util.CollectionUtils;
-import org.springframework.util.StringUtils;
+import org.springframework.util.MimeType;
 
 /**
- * The {@link ChatModel} implementation for the Anthropic service.
+ * {@link ChatModel} and {@link StreamingChatModel} implementation using the official
+ * <a href="https://github.com/anthropics/anthropic-sdk-java">Anthropic Java SDK</a>.
+ *
+ * <p>
+ * Supports synchronous and streaming completions, tool calling, and Micrometer-based
+ * observability. API credentials are auto-detected from {@code ANTHROPIC_API_KEY} if not
+ * configured.
  *
  * @author Christian Tzolov
  * @author luocongqiu
@@ -100,157 +119,142 @@
  * @author Soby Chacko
  * @author Austin Dase
  * @since 1.0.0
+ * @see AnthropicChatOptions
+ * @see <a href="https://docs.anthropic.com/en/api/messages">Anthropic Messages API</a>
  */
-public class AnthropicChatModel implements ChatModel {
+public final class AnthropicChatModel implements ChatModel, StreamingChatModel {
 
-	public static final String DEFAULT_MODEL_NAME = AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue();
+	private static final Logger logger = LoggerFactory.getLogger(AnthropicChatModel.class);
 
-	public static final Integer DEFAULT_MAX_TOKENS = 500;
+	private static final String DEFAULT_MODEL = AnthropicChatOptions.DEFAULT_MODEL;
 
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicChatModel.class);
+	private static final Integer DEFAULT_MAX_TOKENS = AnthropicChatOptions.DEFAULT_MAX_TOKENS;
 
 	private static final ChatModelObservationConvention DEFAULT_OBSERVATION_CONVENTION = new DefaultChatModelObservationConvention();
 
+	private static final String BETA_SKILLS = "skills-2025-10-02";
+
+	private static final String BETA_CODE_EXECUTION = "code-execution-2025-08-25";
+
+	private static final String BETA_FILES_API = "files-api-2025-04-14";
+
 	private static final ToolCallingManager DEFAULT_TOOL_CALLING_MANAGER = ToolCallingManager.builder().build();
 
-	/**
-	 * The retry template used to retry the OpenAI API calls.
-	 */
-	public final RetryTemplate retryTemplate;
+	private final AnthropicClient anthropicClient;
 
-	/**
-	 * The lower-level API for the Anthropic service.
-	 */
-	private final AnthropicApi anthropicApi;
+	private final AnthropicClientAsync anthropicClientAsync;
 
-	/**
-	 * The default options used for the chat completion requests.
-	 */
-	private final AnthropicChatOptions defaultOptions;
+	private final AnthropicChatOptions options;
 
-	/**
-	 * Observation registry used for instrumentation.
-	 */
 	private final ObservationRegistry observationRegistry;
 
 	private final ToolCallingManager toolCallingManager;
 
-	/**
-	 * The tool execution eligibility predicate used to determine if a tool can be
-	 * executed.
-	 */
 	private final ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate;
 
-	/**
-	 * Conventions to use for generating observations.
-	 */
 	private ChatModelObservationConvention observationConvention = DEFAULT_OBSERVATION_CONVENTION;
 
-	public AnthropicChatModel(AnthropicApi anthropicApi, AnthropicChatOptions defaultOptions,
-			ToolCallingManager toolCallingManager, RetryTemplate retryTemplate,
-			ObservationRegistry observationRegistry) {
-		this(anthropicApi, defaultOptions, toolCallingManager, retryTemplate, observationRegistry,
-				new DefaultToolExecutionEligibilityPredicate());
-	}
-
-	public AnthropicChatModel(AnthropicApi anthropicApi, AnthropicChatOptions defaultOptions,
-			ToolCallingManager toolCallingManager, RetryTemplate retryTemplate, ObservationRegistry observationRegistry,
-			ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate) {
-
-		Assert.notNull(anthropicApi, "anthropicApi cannot be null");
-		Assert.notNull(defaultOptions, "defaultOptions cannot be null");
-		Assert.notNull(toolCallingManager, "toolCallingManager cannot be null");
-		Assert.notNull(retryTemplate, "retryTemplate cannot be null");
-		Assert.notNull(observationRegistry, "observationRegistry cannot be null");
-		Assert.notNull(toolExecutionEligibilityPredicate, "toolExecutionEligibilityPredicate cannot be null");
-
-		this.anthropicApi = anthropicApi;
-		this.defaultOptions = defaultOptions;
-		this.toolCallingManager = toolCallingManager;
-		this.retryTemplate = retryTemplate;
-		this.observationRegistry = observationRegistry;
-		this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate;
-	}
-
-	@Override
-	public ChatResponse call(Prompt prompt) {
-		// Before moving any further, build the final request Prompt,
-		// merging runtime and default options.
-		Prompt requestPrompt = buildRequestPrompt(prompt);
-		return this.internalCall(requestPrompt, null);
+	/**
+	 * Creates a new builder for {@link AnthropicChatModel}.
+	 * @return a new builder instance
+	 */
+	public static Builder builder() {
+		return new Builder();
 	}
 
-	private ChatResponse internalCall(Prompt prompt, @Nullable ChatResponse previousChatResponse) {
-		ChatCompletionRequest request = createRequest(prompt, false);
-
-		ChatModelObservationContext observationContext = ChatModelObservationContext.builder()
-			.prompt(prompt)
-			.provider(AnthropicApi.PROVIDER_NAME)
-			.build();
-
-		ChatResponse response = ChatModelObservationDocumentation.CHAT_MODEL_OPERATION
-			.observation(this.observationConvention, DEFAULT_OBSERVATION_CONVENTION, () -> observationContext,
-					this.observationRegistry)
-			.observe(() -> {
+	/**
+	 * Private constructor - use {@link #builder()} to create instances.
+	 */
+	private AnthropicChatModel(@Nullable AnthropicClient anthropicClient,
+			@Nullable AnthropicClientAsync anthropicClientAsync, @Nullable AnthropicChatOptions options,
+			@Nullable ToolCallingManager toolCallingManager, @Nullable ObservationRegistry observationRegistry,
+			@Nullable ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate) {
 
-				ResponseEntity<ChatCompletionResponse> completionEntity = RetryUtils.execute(this.retryTemplate,
-						() -> this.anthropicApi.chatCompletionEntity(request, this.getAdditionalHttpHeaders(prompt)));
+		if (options == null) {
+			this.options = AnthropicChatOptions.builder().model(DEFAULT_MODEL).maxTokens(DEFAULT_MAX_TOKENS).build();
+		}
+		else {
+			this.options = options;
+		}
 
-				AnthropicApi.ChatCompletionResponse completionResponse = Objects
-					.requireNonNull(completionEntity.getBody());
+		this.anthropicClient = Objects.requireNonNullElseGet(anthropicClient,
+				() -> AnthropicSetup.setupSyncClient(this.options.getBaseUrl(), this.options.getApiKey(),
+						this.options.getTimeout(), this.options.getMaxRetries(), this.options.getProxy(),
+						this.options.getCustomHeaders()));
 
-				AnthropicApi.Usage usage = completionResponse.usage();
-				Usage currentChatResponseUsage = usage != null ? this.getDefaultUsage(usage) : new EmptyUsage();
-				Usage accumulatedUsage = UsageCalculator.getCumulativeUsage(currentChatResponseUsage,
-						previousChatResponse);
+		this.anthropicClientAsync = Objects.requireNonNullElseGet(anthropicClientAsync,
+				() -> AnthropicSetup.setupAsyncClient(this.options.getBaseUrl(), this.options.getApiKey(),
+						this.options.getTimeout(), this.options.getMaxRetries(), this.options.getProxy(),
+						this.options.getCustomHeaders()));
 
-				ChatResponse chatResponse = toChatResponse(completionEntity.getBody(), accumulatedUsage);
-				observationContext.setResponse(chatResponse);
+		this.observationRegistry = Objects.requireNonNullElse(observationRegistry, ObservationRegistry.NOOP);
+		this.toolCallingManager = Objects.requireNonNullElse(toolCallingManager, DEFAULT_TOOL_CALLING_MANAGER);
+		this.toolExecutionEligibilityPredicate = Objects.requireNonNullElse(toolExecutionEligibilityPredicate,
+				new DefaultToolExecutionEligibilityPredicate());
+	}
 
-				return chatResponse;
-			});
+	/**
+	 * Gets the chat options for this model.
+	 * @return the chat options
+	 */
+	public AnthropicChatOptions getOptions() {
+		return this.options;
+	}
 
-		Assert.state(prompt.getOptions() != null, "prompt.getOptions() must not be null");
-		if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
-			var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
-			if (toolExecutionResult.returnDirect()) {
-				// Return tool execution result directly to the client.
-				return ChatResponse.builder()
-					.from(response)
-					.generations(ToolExecutionResult.buildGenerations(toolExecutionResult))
-					.build();
-			}
-			else {
-				// Send the tool execution result back to the model.
-				return this.internalCall(new Prompt(toolExecutionResult.conversationHistory(), prompt.getOptions()),
-						response);
-			}
-		}
+	/**
+	 * Returns the underlying synchronous Anthropic SDK client. Useful for accessing SDK
+	 * features directly, such as the Files API ({@code client.beta().files()}).
+	 * @return the sync client
+	 */
+	public AnthropicClient getAnthropicClient() {
+		return this.anthropicClient;
+	}
 
-		return response;
+	/**
+	 * Returns the underlying asynchronous Anthropic SDK client. Useful for non-blocking
+	 * access to SDK features directly, such as the Files API.
+	 * @return the async client
+	 */
+	public AnthropicClientAsync getAnthropicClientAsync() {
+		return this.anthropicClientAsync;
 	}
 
-	private DefaultUsage getDefaultUsage(AnthropicApi.@Nullable Usage usage) {
-		Integer inputTokens = usage != null && usage.inputTokens() != null ? usage.inputTokens() : 0;
-		Integer outputTokens = usage != null && usage.outputTokens() != null ? usage.outputTokens() : 0;
-		return new DefaultUsage(inputTokens, outputTokens, inputTokens + outputTokens, usage);
+	@Override
+	public ChatResponse call(Prompt prompt) {
+		if (this.anthropicClient == null) {
+			throw new IllegalStateException("Anthropic sync client is not configured.");
+		}
+		Prompt requestPrompt = buildRequestPrompt(prompt);
+		return this.internalCall(requestPrompt, null);
 	}
 
 	@Override
 	public Flux<ChatResponse> stream(Prompt prompt) {
-		// Before moving any further, build the final request Prompt,
-		// merging runtime and default options.
 		Prompt requestPrompt = buildRequestPrompt(prompt);
-		return this.internalStream(requestPrompt, null);
+		return internalStream(requestPrompt, null);
 	}
 
-	private Flux<ChatResponse> internalStream(Prompt prompt, @Nullable ChatResponse previousChatResponse) {
+	/**
+	 * Internal method to handle streaming chat completion calls with tool execution
+	 * support. This method is called recursively to support multi-turn tool calling.
+	 * @param prompt The prompt for the chat completion. In a recursive tool-call
+	 * scenario, this prompt will contain the full conversation history including the tool
+	 * results.
+	 * @param previousChatResponse The chat response from the preceding API call. This is
+	 * used to accumulate token usage correctly across multiple API calls in a single user
+	 * turn.
+	 * @return A {@link Flux} of {@link ChatResponse} events, which can include text
+	 * chunks and the final response with tool call information or the model's final
+	 * answer.
+	 */
+	public Flux<ChatResponse> internalStream(Prompt prompt, @Nullable ChatResponse previousChatResponse) {
+
 		return Flux.deferContextual(contextView -> {
-			ChatCompletionRequest request = createRequest(prompt, true);
+			MessageCreateParams request = createRequest(prompt, true);
 
 			ChatModelObservationContext observationContext = ChatModelObservationContext.builder()
 				.prompt(prompt)
-				.provider(AnthropicApi.PROVIDER_NAME)
+				.provider(AiProvider.ANTHROPIC.value())
 				.build();
 
 			Observation observation = ChatModelObservationDocumentation.CHAT_MODEL_OPERATION.observation(
@@ -259,327 +263,327 @@ private Flux<ChatResponse> internalStream(Prompt prompt, @Nullable ChatResponse
 
 			observation.parentObservation(contextView.getOrDefault(ObservationThreadLocalAccessor.KEY, null)).start();
 
-			Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(request,
-					this.getAdditionalHttpHeaders(prompt));
-
-			// @formatter:off
-			Flux<ChatResponse> chatResponseFlux = response.flatMap(chatCompletionResponse -> {
-				AnthropicApi.Usage usage = chatCompletionResponse.usage();
-				Usage currentChatResponseUsage = usage != null ? this.getDefaultUsage(usage) : new EmptyUsage();
-				Usage accumulatedUsage = UsageCalculator.getCumulativeUsage(currentChatResponseUsage, previousChatResponse);
-				ChatResponse chatResponse = toChatResponse(chatCompletionResponse, accumulatedUsage);
-
-				ChatOptions options = prompt.getOptions();
-				Assert.notNull(options, "prompt.getOptions() must not be null");
-				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(options, chatResponse)) {
-
-					if (chatResponse.hasFinishReasons(Set.of("tool_use"))) {
-						// FIXME: bounded elastic needs to be used since tool calling
-						//  is currently only synchronous
-						return Flux.deferContextual(ctx -> {
-							// TODO: factor out the tool execution logic with setting context into a utility.
-							ToolExecutionResult toolExecutionResult;
-							try {
-								ToolCallReactiveContextHolder.setContext(ctx);
-								toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
-							}
-							finally {
-								ToolCallReactiveContextHolder.clearContext();
-							}
-							if (toolExecutionResult.returnDirect()) {
-								// Return tool execution result directly to the client.
-								return Flux.just(ChatResponse.builder().from(chatResponse)
-									.generations(ToolExecutionResult.buildGenerations(toolExecutionResult))
-									.build());
-							}
-							else {
-								// Send the tool execution result back to the model.
-								return this.internalStream(new Prompt(toolExecutionResult.conversationHistory(), prompt.getOptions()),
-										chatResponse);
-							}
-						}).subscribeOn(Schedulers.boundedElastic());
+			// Track streaming state for usage accumulation and tool calls
+			StreamingState streamingState = new StreamingState();
+
+			Flux<ChatResponse> chatResponseFlux = Flux.create(sink -> {
+				this.anthropicClientAsync.messages().createStreaming(request).subscribe(event -> {
+					try {
+						ChatResponse chatResponse = convertStreamEventToChatResponse(event, previousChatResponse,
+								streamingState);
+						if (chatResponse != null) {
+							sink.next(chatResponse);
+						}
+					}
+					catch (Exception e) {
+						logger.error("Error processing streaming event", e);
+						sink.error(e);
+					}
+				}).onCompleteFuture().whenComplete((result, throwable) -> {
+					if (throwable != null) {
+						sink.error(throwable);
 					}
 					else {
-						return Mono.empty();
+						sink.complete();
 					}
-				}
-				else {
-					// If internal tool execution is not required, just return the chat response.
-					return Mono.just(chatResponse);
-				}
-			})
-			.doOnError(observation::error)
-			.doFinally(s -> observation.stop())
-			.contextWrite(ctx -> ctx.put(ObservationThreadLocalAccessor.KEY, observation));
+				});
+			});
+
+			// @formatter:off
+			Flux<ChatResponse> flux = chatResponseFlux
+				.doOnError(observation::error)
+				.doFinally(s -> observation.stop())
+				.contextWrite(ctx -> ctx.put(ObservationThreadLocalAccessor.KEY, observation));
 			// @formatter:on
 
-			return new MessageAggregator().aggregate(chatResponseFlux, observationContext::setResponse);
+			// Aggregate streaming responses and handle tool execution on final response
+			return new MessageAggregator().aggregate(flux, observationContext::setResponse)
+				.flatMap(chatResponse -> handleStreamingToolExecution(prompt, chatResponse));
 		});
 	}
 
-	private ChatResponse toChatResponse(@Nullable ChatCompletionResponse chatCompletion, Usage usage) {
+	/**
+	 * Handles the pivot from receiving a tool-call request to executing the tools and
+	 * starting the recursive streaming call with the results. This method is triggered
+	 * via {@code .flatMap()} after the initial stream from the model is fully consumed by
+	 * the {@link MessageAggregator}.
+	 * @param prompt The original prompt containing tool definitions.
+	 * @param chatResponse The aggregated response from the first API call, which contains
+	 * the tool call requests.
+	 * @return A new {@link Flux} of {@link ChatResponse} events. If tools were executed,
+	 * this Flux is the stream of the model's final answer. Otherwise, it's the original
+	 * response.
+	 */
+	private Flux<ChatResponse> handleStreamingToolExecution(Prompt prompt, ChatResponse chatResponse) {
+		ChatOptions promptOptions = prompt.getOptions();
+		if (promptOptions != null
+				&& this.toolExecutionEligibilityPredicate.isToolExecutionRequired(promptOptions, chatResponse)) {
+			// Only execute tools when the model's turn is complete and its stated reason
+			// for stopping is that it wants to use a tool.
+			if (chatResponse.hasFinishReasons(java.util.Set.of("tool_use"))) {
+				return Flux.deferContextual(ctx -> {
+					ToolExecutionResult toolExecutionResult;
+					try {
+						org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder.setContext(ctx);
+						toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+					}
+					finally {
+						org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder.clearContext();
+					}
+					if (toolExecutionResult.returnDirect()) {
+						// Return tool execution result directly to the client
+						return Flux.just(ChatResponse.builder()
+							.from(chatResponse)
+							.generations(ToolExecutionResult.buildGenerations(toolExecutionResult))
+							.build());
+					}
+					else {
+						// RECURSIVE CALL: Return a *new stream* by calling internalStream
+						// again.
+						// The new prompt contains the full history, including the tool
+						// results.
+						return this.internalStream(
+								new Prompt(toolExecutionResult.conversationHistory(), prompt.getOptions()),
+								chatResponse); // Pass previous response for usage
+												// accumulation
+					}
+				}).subscribeOn(reactor.core.scheduler.Schedulers.boundedElastic()); // Run
+																					// blocking
+																					// tool
+																					// execution
+																					// on
+																					// a
+																					// different
+																					// thread
+			}
+			else {
+				// Tool execution required but not at tool_use finish - skip this response
+				return Flux.empty();
+			}
+		}
+		// No tool execution needed - pass through the response
+		return Flux.just(chatResponse);
+	}
 
-		if (chatCompletion == null) {
-			logger.warn("Null chat completion returned");
-			return new ChatResponse(List.of());
+	/**
+	 * Converts a streaming event to a ChatResponse. Handles message_start, content_block
+	 * events (text and tool_use), and message_delta for final response with usage.
+	 * @param event the raw message stream event
+	 * @param previousChatResponse the previous chat response for usage accumulation
+	 * @param streamingState the state accumulated during streaming
+	 * @return the chat response, or null if the event doesn't produce a response
+	 */
+	private @Nullable ChatResponse convertStreamEventToChatResponse(RawMessageStreamEvent event,
+			@Nullable ChatResponse previousChatResponse, StreamingState streamingState) {
+
+		// -- Event: message_start --
+		// Captures message ID, model, and input tokens from the first event.
+		if (event.messageStart().isPresent()) {
+			var startEvent = event.messageStart().get();
+			var message = startEvent.message();
+			streamingState.setMessageInfo(message.id(), message.model().asString(), message.usage().inputTokens());
+			return null;
 		}
 
-		List<Generation> generations = new ArrayList<>();
-		List<AssistantMessage.ToolCall> toolCalls = new ArrayList<>();
-		CitationContext citationContext = new CitationContext();
-		for (ContentBlock content : chatCompletion.content()) {
-			switch (content.type()) {
-				case TEXT, TEXT_DELTA:
-					Generation textGeneration = processTextContent(content, chatCompletion.stopReason(),
-							citationContext);
-					generations.add(textGeneration);
-					break;
-				case THINKING:
-					Map<String, Object> thinkingProperties = new HashMap<>();
-					String signature = content.signature();
-					Assert.notNull(signature, "The signature of the content can't be null");
-					Assert.notNull(content.thinking(), "The thinking of the content can't be null");
-					thinkingProperties.put("signature", signature);
-					generations.add(new Generation(
-							AssistantMessage.builder()
-								.content(content.thinking())
-								.properties(thinkingProperties)
-								.build(),
-							ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build()));
-					break;
-				case THINKING_DELTA:
-					Assert.notNull(content.thinking(), "The thinking of the content can't be null");
-					generations.add(new Generation(AssistantMessage.builder().content(content.thinking()).build(),
-							ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build()));
-					break;
-				case SIGNATURE_DELTA:
-					Map<String, Object> signatureProperties = new HashMap<>();
-					String sig = content.signature();
-					Assert.notNull(sig, "The signature of the content can't be null");
-					signatureProperties.put("signature", sig);
-					generations.add(new Generation(
-							AssistantMessage.builder().content("").properties(signatureProperties).build(),
-							ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build()));
-					break;
-				case REDACTED_THINKING:
-					Map<String, Object> redactedProperties = new HashMap<>();
-					String data = content.data();
-					Assert.notNull(data, "The data of the content must not be null");
-					redactedProperties.put("data", data);
-					generations.add(new Generation(AssistantMessage.builder().properties(redactedProperties).build(),
-							ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build()));
-					break;
-				case TOOL_USE:
-					var functionCallId = content.id();
-					Assert.notNull(functionCallId, "The id of the content must not be null");
-					var functionName = content.name();
-					Assert.notNull(functionName, "The name of the content must not be null");
-					var functionArguments = JsonParser.toJson(content.input());
-					toolCalls.add(
-							new AssistantMessage.ToolCall(functionCallId, "function", functionName, functionArguments));
-					break;
-				default:
-					logger.warn("Unsupported content block type: {}", content.type());
+		// -- Event: content_block_start --
+		// Initializes tool call tracking or emits redacted thinking blocks.
+		if (event.contentBlockStart().isPresent()) {
+			var startEvent = event.contentBlockStart().get();
+			var contentBlock = startEvent.contentBlock();
+			if (contentBlock.toolUse().isPresent()) {
+				var toolUseBlock = contentBlock.asToolUse();
+				streamingState.startToolUse(toolUseBlock.id(), toolUseBlock.name());
 			}
+			else if (contentBlock.isRedactedThinking()) {
+				// Emit redacted thinking block immediately
+				RedactedThinkingBlock redactedBlock = contentBlock.asRedactedThinking();
+				Map<String, Object> redactedProperties = new HashMap<>();
+				redactedProperties.put("data", redactedBlock.data());
+				AssistantMessage assistantMessage = AssistantMessage.builder().properties(redactedProperties).build();
+				return new ChatResponse(List.of(new Generation(assistantMessage)));
+			}
+			return null;
 		}
 
-		if (chatCompletion.stopReason() != null && generations.isEmpty()) {
-			Generation generation = new Generation(AssistantMessage.builder().content("").properties(Map.of()).build(),
-					ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build());
-			generations.add(generation);
-		}
+		// -- Event: content_block_delta --
+		// Handles incremental text, tool argument JSON, thinking, and citation deltas.
+		if (event.contentBlockDelta().isPresent()) {
+			var deltaEvent = event.contentBlockDelta().get();
+			var delta = deltaEvent.delta();
+
+			// Text chunk — emit immediately
+			if (delta.text().isPresent()) {
+				String text = delta.asText().text();
+				AssistantMessage assistantMessage = AssistantMessage.builder().content(text).build();
+				Generation generation = new Generation(assistantMessage);
+				return new ChatResponse(List.of(generation));
+			}
 
-		if (!CollectionUtils.isEmpty(toolCalls)) {
-			AssistantMessage assistantMessage = AssistantMessage.builder()
-				.content("")
-				.properties(Map.of())
-				.toolCalls(toolCalls)
-				.build();
-			Generation toolCallGeneration = new Generation(assistantMessage,
-					ChatGenerationMetadata.builder().finishReason(chatCompletion.stopReason()).build());
-			generations.add(toolCallGeneration);
-		}
+			// Tool argument JSON chunk — accumulate for later
+			if (delta.inputJson().isPresent()) {
+				String partialJson = delta.asInputJson().partialJson();
+				streamingState.appendToolJson(partialJson);
+				return null;
+			}
 
-		// Create response metadata with citation information if present
-		ChatResponseMetadata.Builder metadataBuilder = ChatResponseMetadata.builder()
-			.id(chatCompletion.id())
-			.model(chatCompletion.model())
-			.usage(usage)
-			.keyValue("stop-reason", chatCompletion.stopReason())
-			.keyValue("stop-sequence", chatCompletion.stopSequence())
-			.keyValue("type", chatCompletion.type())
-			.keyValue("anthropic-response", chatCompletion);
+			// Thinking chunk — emit with thinking metadata
+			if (delta.isThinking()) {
+				String thinkingText = delta.asThinking().thinking();
+				Map<String, Object> thinkingProperties = new HashMap<>();
+				thinkingProperties.put("thinking", Boolean.TRUE);
+				AssistantMessage assistantMessage = AssistantMessage.builder()
+					.content(thinkingText)
+					.properties(thinkingProperties)
+					.build();
+				return new ChatResponse(List.of(new Generation(assistantMessage)));
+			}
+
+			// Thinking signature — emit with signature metadata
+			if (delta.isSignature()) {
+				String signature = delta.asSignature().signature();
+				Map<String, Object> signatureProperties = new HashMap<>();
+				signatureProperties.put("signature", signature);
+				AssistantMessage assistantMessage = AssistantMessage.builder().properties(signatureProperties).build();
+				return new ChatResponse(List.of(new Generation(assistantMessage)));
+			}
 
-		// Add citation metadata if citations were found
-		if (citationContext.hasCitations()) {
-			metadataBuilder.keyValue("citations", citationContext.getAllCitations())
-				.keyValue("citationCount", citationContext.getTotalCitationCount());
+			// Citation — accumulate for final response metadata
+			if (delta.isCitations()) {
+				CitationsDelta citationsDelta = delta.asCitations();
+				Citation citation = convertStreamingCitation(citationsDelta.citation());
+				if (citation != null) {
+					streamingState.addCitation(citation);
+				}
+				return null;
+			}
 		}
 
-		ChatResponseMetadata responseMetadata = metadataBuilder.build();
+		// -- Event: content_block_stop --
+		// Finalizes the current tool call if one was being tracked.
+		if (event.contentBlockStop().isPresent()) {
+			if (streamingState.isTrackingToolUse()) {
+				streamingState.finishToolUse();
+			}
+			return null;
+		}
 
-		return new ChatResponse(generations, responseMetadata);
-	}
+		// -- Event: message_delta --
+		// Final event with stop_reason and usage. Triggers tool execution if needed.
+		Optional<ChatResponse> messageDeltaResponse = event.messageDelta().map(deltaEvent -> {
+			String stopReason = deltaEvent.delta().stopReason().map(r -> r.toString()).orElse("");
+			ChatGenerationMetadata metadata = ChatGenerationMetadata.builder().finishReason(stopReason).build();
+
+			// Build assistant message with any accumulated tool calls
+			AssistantMessage.Builder assistantMessageBuilder = AssistantMessage.builder().content("");
+			List<ToolCall> toolCalls = streamingState.getCompletedToolCalls();
+			if (!toolCalls.isEmpty()) {
+				assistantMessageBuilder.toolCalls(toolCalls);
+			}
 
-	private Generation processTextContent(ContentBlock content, @Nullable String stopReason,
-			CitationContext citationContext) {
-		// Extract citations if present in the content block
-		if (content.citations() instanceof List) {
-			try {
-				@SuppressWarnings("unchecked")
-				List<Object> citationObjects = (List<Object>) content.citations();
+			Generation generation = new Generation(assistantMessageBuilder.build(), metadata);
 
-				List<Citation> citations = new ArrayList<>();
-				for (Object citationObj : citationObjects) {
-					if (citationObj instanceof Map) {
-						// Convert Map to CitationResponse using manual parsing
-						AnthropicApi.CitationResponse citationResponse = parseCitationFromMap((Map<?, ?>) citationObj);
-						citations.add(convertToCitation(citationResponse));
-					}
-					else {
-						logger.warn("Unexpected citation object type: {}. Expected Map but got: {}. Skipping citation.",
-								citationObj.getClass().getName(), citationObj);
-					}
-				}
+			// Combine input tokens from message_start with output tokens from
+			// message_delta
+			long inputTokens = streamingState.getInputTokens();
+			long outputTokens = deltaEvent.usage().outputTokens();
+			Usage usage = new DefaultUsage(Math.toIntExact(inputTokens), Math.toIntExact(outputTokens),
+					Math.toIntExact(inputTokens + outputTokens), deltaEvent.usage());
 
-				if (!citations.isEmpty()) {
-					citationContext.addCitations(citations);
-				}
+			Usage accumulatedUsage = previousChatResponse != null
+					? UsageCalculator.getCumulativeUsage(usage, previousChatResponse) : usage;
 
+			ChatResponseMetadata.Builder metadataBuilder = ChatResponseMetadata.builder()
+				.id(streamingState.getMessageId())
+				.model(streamingState.getModel())
+				.usage(accumulatedUsage);
+
+			List<Citation> citations = streamingState.getCitations();
+			if (!citations.isEmpty()) {
+				metadataBuilder.keyValue("citations", citations).keyValue("citationCount", citations.size());
 			}
-			catch (Exception e) {
-				logger.warn("Failed to parse citations from content block", e);
-			}
-		}
 
-		return new Generation(new AssistantMessage(content.text()),
-				ChatGenerationMetadata.builder().finishReason(stopReason).build());
-	}
+			return new ChatResponse(List.of(generation), metadataBuilder.build());
+		});
 
-	/**
-	 * Parse citation data from Map (typically from JSON deserialization). Assumes all
-	 * required fields are present and of correct types.
-	 * @param citationMap the map containing citation data from API response
-	 * @return parsed CitationResponse
-	 */
-	private AnthropicApi.CitationResponse parseCitationFromMap(Map<?, ?> citationMap) {
-		String type = (String) citationMap.get("type");
-		Assert.notNull(type, "The citation map must contain a 'type' entry");
-		String citedText = (String) citationMap.get("cited_text");
-		Assert.notNull(citedText, "The citation map must contain a 'cited_text' entry");
-		Integer documentIndex = (Integer) citationMap.get("document_index");
-		Assert.notNull(documentIndex, "The citation map must contain a 'document_index' entry");
-
-		String documentTitle = (String) citationMap.get("document_title");
-		Integer startCharIndex = (Integer) citationMap.get("start_char_index");
-		Integer endCharIndex = (Integer) citationMap.get("end_char_index");
-		Integer startPageNumber = (Integer) citationMap.get("start_page_number");
-		Integer endPageNumber = (Integer) citationMap.get("end_page_number");
-		Integer startBlockIndex = (Integer) citationMap.get("start_block_index");
-		Integer endBlockIndex = (Integer) citationMap.get("end_block_index");
-
-		return new AnthropicApi.CitationResponse(type, citedText, documentIndex, documentTitle, startCharIndex,
-				endCharIndex, startPageNumber, endPageNumber, startBlockIndex, endBlockIndex);
+		return messageDeltaResponse.orElse(null);
 	}
 
 	/**
-	 * Convert CitationResponse to Citation object. This method handles the conversion to
-	 * avoid circular dependencies.
+	 * Internal method to handle synchronous chat completion calls with tool execution
+	 * support. This method is called recursively to support multi-turn tool calling.
+	 * @param prompt The prompt for the chat completion. In a recursive tool-call
+	 * scenario, this prompt will contain the full conversation history including the tool
+	 * results.
+	 * @param previousChatResponse The chat response from the preceding API call. This is
+	 * used to accumulate token usage correctly across multiple API calls in a single user
+	 * turn.
+	 * @return The final {@link ChatResponse} after all tool calls (if any) are resolved.
 	 */
-	private Citation convertToCitation(AnthropicApi.CitationResponse citationResponse) {
-		return switch (citationResponse.type()) {
-			case "char_location" -> {
-				Integer startCharIndex = citationResponse.startCharIndex();
-				Assert.notNull(startCharIndex, "citationResponse.startCharIndex() must not be null");
-				Integer endCharIndex = citationResponse.endCharIndex();
-				Assert.notNull(endCharIndex, "citationResponse.endCharIndex() must not be null");
-				yield Citation.ofCharLocation(citationResponse.citedText(), citationResponse.documentIndex(),
-						citationResponse.documentTitle(), startCharIndex, endCharIndex);
-			}
-			case "page_location" -> {
-				Integer startPageNumber = citationResponse.startPageNumber();
-				Assert.notNull(startPageNumber, "citationResponse.startPageNumber() must not be null");
-				Integer endPageNumber = citationResponse.endPageNumber();
-				Assert.notNull(endPageNumber, "citationResponse.endPageNumber() must not be null");
-				yield Citation.ofPageLocation(citationResponse.citedText(), citationResponse.documentIndex(),
-						citationResponse.documentTitle(), startPageNumber, endPageNumber);
-			}
-			case "content_block_location" -> {
-				Integer startBlockIndex = citationResponse.startBlockIndex();
-				Assert.notNull(startBlockIndex, "citationResponse.startBlockIndex() must not be null");
-				Integer endBlockIndex = citationResponse.endBlockIndex();
-				Assert.notNull(endBlockIndex, "citationResponse.endBlockIndex() must not be null");
-				yield Citation.ofContentBlockLocation(citationResponse.citedText(), citationResponse.documentIndex(),
-						citationResponse.documentTitle(), startBlockIndex, endBlockIndex);
-			}
-			default -> throw new IllegalArgumentException("Unknown citation type: " + citationResponse.type());
-		};
-	}
+	public ChatResponse internalCall(Prompt prompt, @Nullable ChatResponse previousChatResponse) {
 
-	private ChatResponseMetadata from(AnthropicApi.ChatCompletionResponse result) {
-		return from(result, this.getDefaultUsage(result.usage()));
-	}
+		MessageCreateParams request = createRequest(prompt, false);
 
-	private ChatResponseMetadata from(AnthropicApi.ChatCompletionResponse result, Usage usage) {
-		Assert.notNull(result, "Anthropic ChatCompletionResult must not be null");
-		return ChatResponseMetadata.builder()
-			.id(result.id())
-			.model(result.model())
-			.usage(usage)
-			.keyValue("stop-reason", result.stopReason())
-			.keyValue("stop-sequence", result.stopSequence())
-			.keyValue("type", result.type())
+		ChatModelObservationContext observationContext = ChatModelObservationContext.builder()
+			.prompt(prompt)
+			.provider(AiProvider.ANTHROPIC.value())
 			.build();
-	}
 
-	private Source getSourceByMedia(Media media) {
-		String data = this.fromMediaData(media.getData());
+		ChatResponse response = ChatModelObservationDocumentation.CHAT_MODEL_OPERATION
+			.observation(this.observationConvention, DEFAULT_OBSERVATION_CONVENTION, () -> observationContext,
+					this.observationRegistry)
+			.observe(() -> {
 
-		// http is not allowed and redirect not allowed
-		if (data.startsWith("https://")) {
-			return new Source(data);
-		}
-		else {
-			return new Source(media.getMimeType().toString(), data);
-		}
-	}
+				Message message = this.anthropicClient.messages().create(request);
 
-	private String fromMediaData(Object mediaData) {
-		if (mediaData instanceof byte[] bytes) {
-			return Base64.getEncoder().encodeToString(bytes);
-		}
-		else if (mediaData instanceof String text) {
-			return text;
-		}
-		else {
-			throw new IllegalArgumentException("Unsupported media data type: " + mediaData.getClass().getSimpleName());
-		}
+				List<ContentBlock> contentBlocks = message.content();
+				if (contentBlocks.isEmpty()) {
+					logger.warn("No content blocks returned for prompt: {}", prompt);
+					return new ChatResponse(List.of());
+				}
 
-	}
+				List<Citation> citations = new ArrayList<>();
+				List<Generation> generations = buildGenerations(message, citations);
 
-	private Type getContentBlockTypeByMedia(Media media) {
-		String mimeType = media.getMimeType().toString();
-		if (mimeType.startsWith("image")) {
-			return Type.IMAGE;
-		}
-		else if (mimeType.contains("pdf")) {
-			return Type.DOCUMENT;
-		}
-		throw new IllegalArgumentException("Unsupported media type: " + mimeType
-				+ ". Supported types are: images (image/*) and PDF documents (application/pdf)");
-	}
+				// Current usage
+				com.anthropic.models.messages.Usage sdkUsage = message.usage();
+				Usage currentChatResponseUsage = getDefaultUsage(sdkUsage);
+				Usage accumulatedUsage = previousChatResponse != null
+						? UsageCalculator.getCumulativeUsage(currentChatResponseUsage, previousChatResponse)
+						: currentChatResponseUsage;
 
-	private HttpHeaders getAdditionalHttpHeaders(Prompt prompt) {
+				ChatResponse chatResponse = new ChatResponse(generations, from(message, accumulatedUsage, citations));
 
-		Map<String, String> headers = new HashMap<>(this.defaultOptions.getHttpHeaders());
-		if (prompt.getOptions() != null && prompt.getOptions() instanceof AnthropicChatOptions chatOptions) {
-			headers.putAll(chatOptions.getHttpHeaders());
+				observationContext.setResponse(chatResponse);
+
+				return chatResponse;
+			});
+
+		ChatOptions promptOptions = prompt.getOptions();
+		if (promptOptions != null
+				&& this.toolExecutionEligibilityPredicate.isToolExecutionRequired(promptOptions, response)) {
+			var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+			if (toolExecutionResult.returnDirect()) {
+				// Return tool execution result directly to the client.
+				return ChatResponse.builder()
+					.from(response)
+					.generations(ToolExecutionResult.buildGenerations(toolExecutionResult))
+					.build();
+			}
+			else {
+				// Send the tool execution result back to the model.
+				return this.internalCall(new Prompt(toolExecutionResult.conversationHistory(), prompt.getOptions()),
+						response);
+			}
 		}
-		HttpHeaders httpHeaders = new HttpHeaders();
-		headers.forEach(httpHeaders::add);
-		return httpHeaders;
+
+		return response;
 	}
 
+	/**
+	 * Builds the request prompt by merging runtime options with default options.
+	 * @param prompt the original prompt
+	 * @return the prompt with merged options
+	 */
 	Prompt buildRequestPrompt(Prompt prompt) {
 		// Process runtime options
 		AnthropicChatOptions runtimeOptions = null;
@@ -595,521 +599,991 @@ Prompt buildRequestPrompt(Prompt prompt) {
 		}
 
 		// Define request options by merging runtime options and default options
-		AnthropicChatOptions requestOptions = ModelOptionsUtils.merge(runtimeOptions, this.defaultOptions,
-				AnthropicChatOptions.class);
+		AnthropicChatOptions.Builder<?> mergedBuilder = this.options.mutate();
+		if (runtimeOptions != null) {
+			mergedBuilder.combineWith(runtimeOptions.mutate());
+		}
+		AnthropicChatOptions requestOptions = mergedBuilder.build();
 
-		// Merge @JsonIgnore-annotated options explicitly since they are ignored by
-		// Jackson, used by ModelOptionsUtils.
+		// Merge @JsonIgnore fields explicitly (lost during copyToTarget)
 		if (runtimeOptions != null) {
-			if (runtimeOptions.getFrequencyPenalty() != null) {
-				logger.warn("The frequencyPenalty option is not supported by Anthropic API. Ignoring.");
-			}
-			if (runtimeOptions.getPresencePenalty() != null) {
-				logger.warn("The presencePenalty option is not supported by Anthropic API. Ignoring.");
-			}
-			requestOptions.setHttpHeaders(
-					mergeHttpHeaders(runtimeOptions.getHttpHeaders(), this.defaultOptions.getHttpHeaders()));
-			requestOptions.setInternalToolExecutionEnabled(
-					ModelOptionsUtils.mergeOption(runtimeOptions.getInternalToolExecutionEnabled(),
-							this.defaultOptions.getInternalToolExecutionEnabled()));
-			requestOptions.setToolNames(ToolCallingChatOptions.mergeToolNames(runtimeOptions.getToolNames(),
-					this.defaultOptions.getToolNames()));
+			requestOptions.setInternalToolExecutionEnabled(runtimeOptions.getInternalToolExecutionEnabled() != null
+					? runtimeOptions.getInternalToolExecutionEnabled()
+					: this.options.getInternalToolExecutionEnabled());
+			requestOptions.setToolNames(
+					ToolCallingChatOptions.mergeToolNames(runtimeOptions.getToolNames(), this.options.getToolNames()));
 			requestOptions.setToolCallbacks(ToolCallingChatOptions.mergeToolCallbacks(runtimeOptions.getToolCallbacks(),
-					this.defaultOptions.getToolCallbacks()));
+					this.options.getToolCallbacks()));
 			requestOptions.setToolContext(ToolCallingChatOptions.mergeToolContext(runtimeOptions.getToolContext(),
-					this.defaultOptions.getToolContext()));
-
-			// Merge cache options that are Json-ignored
-			requestOptions.setCacheOptions(runtimeOptions.getCacheOptions());
-
-			// Merge citation documents that are Json-ignored
-			if (!runtimeOptions.getCitationDocuments().isEmpty()) {
-				requestOptions.setCitationDocuments(runtimeOptions.getCitationDocuments());
-			}
-			else if (!this.defaultOptions.getCitationDocuments().isEmpty()) {
-				requestOptions.setCitationDocuments(this.defaultOptions.getCitationDocuments());
-			}
-
-			// Merge skillContainer that is Json-ignored
-			if (runtimeOptions.getSkillContainer() != null) {
-				requestOptions.setSkillContainer(runtimeOptions.getSkillContainer());
-			}
-			else if (this.defaultOptions.getSkillContainer() != null) {
-				requestOptions.setSkillContainer(this.defaultOptions.getSkillContainer());
+					this.options.getToolContext()));
+			if (prompt.getOptions() instanceof AnthropicChatOptions originalAnthropicOptions) {
+				if (!originalAnthropicOptions.getCitationDocuments().isEmpty()) {
+					requestOptions
+						.setCitationDocuments(new ArrayList<>(originalAnthropicOptions.getCitationDocuments()));
+				}
+				if (originalAnthropicOptions.getCacheOptions() != null
+						&& originalAnthropicOptions.getCacheOptions().getStrategy() != AnthropicCacheStrategy.NONE) {
+					requestOptions.setCacheOptions(originalAnthropicOptions.getCacheOptions());
+				}
+				if (originalAnthropicOptions.getOutputConfig() != null) {
+					requestOptions.setOutputConfig(originalAnthropicOptions.getOutputConfig());
+				}
+				if (!originalAnthropicOptions.getHttpHeaders().isEmpty()) {
+					requestOptions.setHttpHeaders(new HashMap<>(originalAnthropicOptions.getHttpHeaders()));
+				}
+				if (originalAnthropicOptions.getSkillContainer() != null) {
+					requestOptions.setSkillContainer(originalAnthropicOptions.getSkillContainer());
+				}
 			}
 		}
-		else {
-			requestOptions.setHttpHeaders(this.defaultOptions.getHttpHeaders());
-			requestOptions.setInternalToolExecutionEnabled(this.defaultOptions.getInternalToolExecutionEnabled());
-			requestOptions.setToolNames(this.defaultOptions.getToolNames());
-			requestOptions.setToolCallbacks(this.defaultOptions.getToolCallbacks());
-			requestOptions.setToolContext(this.defaultOptions.getToolContext());
-			requestOptions.setCitationDocuments(this.defaultOptions.getCitationDocuments());
-			requestOptions.setSkillContainer(this.defaultOptions.getSkillContainer());
-		}
 
 		ToolCallingChatOptions.validateToolCallbacks(requestOptions.getToolCallbacks());
 
 		return new Prompt(prompt.getInstructions(), requestOptions);
 	}
 
-	private Map<String, String> mergeHttpHeaders(Map<String, String> runtimeHttpHeaders,
-			Map<String, String> defaultHttpHeaders) {
-		var mergedHttpHeaders = new HashMap<>(defaultHttpHeaders);
-		mergedHttpHeaders.putAll(runtimeHttpHeaders);
-		return mergedHttpHeaders;
-	}
+	/**
+	 * Creates a {@link MessageCreateParams} request from a Spring AI {@link Prompt}. Maps
+	 * message types to Anthropic format: TOOL messages become user messages with
+	 * {@link ToolResultBlockParam}, and ASSISTANT messages with tool calls become
+	 * {@link ToolUseBlockParam} blocks.
+	 * @param prompt the prompt with message history and options
+	 * @param stream not currently used; sync/async determined by client method
+	 * @return the constructed request parameters
+	 */
+	MessageCreateParams createRequest(Prompt prompt, boolean stream) {
 
-	ChatCompletionRequest createRequest(Prompt prompt, boolean stream) {
+		MessageCreateParams.Builder builder = MessageCreateParams.builder();
 
-		// Get caching strategy and options from the request
-		AnthropicChatOptions requestOptions = null;
-		if (prompt.getOptions() instanceof AnthropicChatOptions) {
-			requestOptions = (AnthropicChatOptions) prompt.getOptions();
-		}
+		ChatOptions options = prompt.getOptions();
+		AnthropicChatOptions requestOptions = options instanceof AnthropicChatOptions anthropicOptions
+				? anthropicOptions : AnthropicChatOptions.builder().build();
 
-		AnthropicCacheOptions cacheOptions = requestOptions != null ? requestOptions.getCacheOptions()
-				: AnthropicCacheOptions.DISABLED;
+		// Set required fields
+		String model = requestOptions.getModel() != null ? requestOptions.getModel() : DEFAULT_MODEL;
+		builder.model(model);
 
-		CacheEligibilityResolver cacheEligibilityResolver = CacheEligibilityResolver.from(cacheOptions);
+		long maxTokens = requestOptions.getMaxTokens() != null ? requestOptions.getMaxTokens() : DEFAULT_MAX_TOKENS;
+		builder.maxTokens(maxTokens);
 
-		// Process system - as array if caching, string otherwise
-		Object systemContent = buildSystemContent(prompt, cacheEligibilityResolver);
+		// Create cache resolver
+		CacheEligibilityResolver cacheResolver = CacheEligibilityResolver.from(requestOptions.getCacheOptions());
 
-		// Build messages WITHOUT blanket cache control - strategic placement only
-		List<AnthropicMessage> userMessages = buildMessages(prompt, cacheEligibilityResolver);
+		// Prepare citation documents for inclusion in the first user message
+		List<AnthropicCitationDocument> citationDocuments = requestOptions.getCitationDocuments();
+		boolean citationDocsAdded = false;
 
-		// Build base request
-		ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages,
-				systemContent, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream);
+		// Collect system messages and non-system messages separately
+		List<String> systemTexts = new ArrayList<>();
+		List<org.springframework.ai.chat.messages.Message> nonSystemMessages = new ArrayList<>();
+		for (org.springframework.ai.chat.messages.Message message : prompt.getInstructions()) {
+			if (message.getMessageType() == MessageType.SYSTEM) {
+				String text = message.getText();
+				if (text != null) {
+					systemTexts.add(text);
+				}
+			}
+			else {
+				nonSystemMessages.add(message);
+			}
+		}
 
-		// Save toolChoice for later application (after code_execution tool is added)
-		AnthropicApi.ToolChoice savedToolChoice = requestOptions != null ? requestOptions.getToolChoice() : null;
-		AnthropicChatOptions mergeOptions = requestOptions;
-		if (savedToolChoice != null && requestOptions != null) {
-			// Create a copy without toolChoice to avoid premature merge
-			mergeOptions = requestOptions.copy();
-			mergeOptions.setToolChoice(null);
+		// Process system messages with cache support
+		if (!systemTexts.isEmpty()) {
+			if (!cacheResolver.isCachingEnabled()) {
+				// No caching: join all system texts and use simple string format
+				builder.system(String.join("\n\n", systemTexts));
+			}
+			else if (requestOptions.getCacheOptions().isMultiBlockSystemCaching() && systemTexts.size() > 1) {
+				// Multi-block system caching: each text becomes a separate
+				// TextBlockParam.
+				// Cache control is applied to the second-to-last block.
+				List<TextBlockParam> systemBlocks = new ArrayList<>();
+				for (int i = 0; i < systemTexts.size(); i++) {
+					TextBlockParam.Builder textBlockBuilder = TextBlockParam.builder().text(systemTexts.get(i));
+					if (i == systemTexts.size() - 2) {
+						CacheControlEphemeral cacheControl = cacheResolver.resolve(MessageType.SYSTEM,
+								String.join("\n\n", systemTexts));
+						if (cacheControl != null) {
+							textBlockBuilder.cacheControl(cacheControl);
+							cacheResolver.useCacheBlock();
+						}
+					}
+					systemBlocks.add(textBlockBuilder.build());
+				}
+				builder.systemOfTextBlockParams(systemBlocks);
+			}
+			else {
+				// Single-block system caching: join all texts into one TextBlockParam
+				String joinedText = String.join("\n\n", systemTexts);
+				CacheControlEphemeral cacheControl = cacheResolver.resolve(MessageType.SYSTEM, joinedText);
+				if (cacheControl != null) {
+					builder.systemOfTextBlockParams(
+							List.of(TextBlockParam.builder().text(joinedText).cacheControl(cacheControl).build()));
+					cacheResolver.useCacheBlock();
+				}
+				else {
+					builder.system(joinedText);
+				}
+			}
 		}
 
-		request = ModelOptionsUtils.merge(mergeOptions, request, ChatCompletionRequest.class);
+		// Pre-compute last user message index for CONVERSATION_HISTORY strategy
+		int lastUserIndex = -1;
+		if (cacheResolver.isCachingEnabled()) {
+			for (int i = nonSystemMessages.size() - 1; i >= 0; i--) {
+				if (nonSystemMessages.get(i).getMessageType() == MessageType.USER) {
+					lastUserIndex = i;
+					break;
+				}
+			}
+		}
 
-		// Add the tool definitions with potential caching
-		Assert.state(requestOptions != null, "AnthropicChatOptions must not be null");
-		List<ToolDefinition> toolDefinitions = this.toolCallingManager.resolveToolDefinitions(requestOptions);
-		if (!CollectionUtils.isEmpty(toolDefinitions)) {
-			request = ModelOptionsUtils.merge(request, this.defaultOptions, ChatCompletionRequest.class);
-			List<AnthropicApi.Tool> tools = getFunctionTools(toolDefinitions);
+		// Process non-system messages
+		for (int i = 0; i < nonSystemMessages.size(); i++) {
+			org.springframework.ai.chat.messages.Message message = nonSystemMessages.get(i);
+
+			if (message.getMessageType() == MessageType.USER) {
+				UserMessage userMessage = (UserMessage) message;
+				boolean hasCitationDocs = !citationDocsAdded && !citationDocuments.isEmpty();
+				boolean hasMedia = !CollectionUtils.isEmpty(userMessage.getMedia());
+				boolean isLastUserMessage = (i == lastUserIndex);
+				boolean applyCacheToUser = isLastUserMessage && cacheResolver.isCachingEnabled();
+
+				// Compute cache control for last user message
+				CacheControlEphemeral userCacheControl = null;
+				if (applyCacheToUser) {
+					String combinedText = combineEligibleMessagesText(nonSystemMessages, lastUserIndex);
+					userCacheControl = cacheResolver.resolve(MessageType.USER, combinedText);
+				}
+
+				if (hasCitationDocs || hasMedia || userCacheControl != null) {
+					List<ContentBlockParam> contentBlocks = new ArrayList<>();
+
+					// Prepend citation document blocks to the first user message
+					if (hasCitationDocs) {
+						for (AnthropicCitationDocument doc : citationDocuments) {
+							contentBlocks.add(ContentBlockParam.ofDocument(doc.toDocumentBlockParam()));
+						}
+						citationDocsAdded = true;
+					}
 
-			// Apply caching to tools if strategy includes them
-			tools = addCacheToLastTool(tools, cacheEligibilityResolver);
+					String text = userMessage.getText();
+					if (text != null && !text.isEmpty()) {
+						TextBlockParam.Builder textBlockBuilder = TextBlockParam.builder().text(text);
+						if (userCacheControl != null) {
+							textBlockBuilder.cacheControl(userCacheControl);
+							cacheResolver.useCacheBlock();
+						}
+						contentBlocks.add(ContentBlockParam.ofText(textBlockBuilder.build()));
+					}
+
+					if (hasMedia) {
+						for (Media media : userMessage.getMedia()) {
+							contentBlocks.add(getContentBlockParamByMedia(media));
+						}
+					}
 
-			request = ChatCompletionRequest.from(request).tools(tools).build();
+					builder.addUserMessageOfBlockParams(contentBlocks);
+				}
+				else {
+					String text = message.getText();
+					if (text != null) {
+						builder.addUserMessage(text);
+					}
+				}
+			}
+			else if (message.getMessageType() == MessageType.ASSISTANT) {
+				AssistantMessage assistantMessage = (AssistantMessage) message;
+				if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
+					List<ContentBlockParam> toolUseBlocks = assistantMessage.getToolCalls()
+						.stream()
+						.map(toolCall -> ContentBlockParam.ofToolUse(ToolUseBlockParam.builder()
+							.id(toolCall.id())
+							.name(toolCall.name())
+							.input(buildToolInput(toolCall.arguments()))
+							.build()))
+						.toList();
+					builder.addAssistantMessageOfBlockParams(toolUseBlocks);
+				}
+				else {
+					String text = message.getText();
+					if (text != null) {
+						builder.addAssistantMessage(text);
+					}
+				}
+			}
+			else if (message.getMessageType() == MessageType.TOOL) {
+				ToolResponseMessage toolResponseMessage = (ToolResponseMessage) message;
+				List<ContentBlockParam> toolResultBlocks = toolResponseMessage.getResponses()
+					.stream()
+					.map(response -> ContentBlockParam.ofToolResult(ToolResultBlockParam.builder()
+						.toolUseId(response.id())
+						.content(response.responseData())
+						.build()))
+					.toList();
+				builder.addUserMessageOfBlockParams(toolResultBlocks);
+			}
 		}
 
-		// Add Skills container from options if present
-		AnthropicApi.SkillContainer skillContainer = null;
-		if (requestOptions != null && requestOptions.getSkillContainer() != null) {
-			skillContainer = requestOptions.getSkillContainer();
+		// Set optional parameters
+		if (requestOptions.getTemperature() != null) {
+			builder.temperature(requestOptions.getTemperature());
 		}
-		else if (this.defaultOptions.getSkillContainer() != null) {
-			skillContainer = this.defaultOptions.getSkillContainer();
+		if (requestOptions.getTopP() != null) {
+			builder.topP(requestOptions.getTopP());
+		}
+		if (requestOptions.getTopK() != null) {
+			builder.topK(requestOptions.getTopK().longValue());
+		}
+		if (requestOptions.getStopSequences() != null && !requestOptions.getStopSequences().isEmpty()) {
+			builder.stopSequences(requestOptions.getStopSequences());
+		}
+		if (requestOptions.getMetadata() != null) {
+			builder.metadata(requestOptions.getMetadata());
+		}
+		if (requestOptions.getThinking() != null) {
+			builder.thinking(requestOptions.getThinking());
 		}
 
-		if (skillContainer != null) {
-			request = ChatCompletionRequest.from(request).container(skillContainer).build();
-
-			// Skills require the code_execution tool to be enabled
-			// Add it if not already present
-			List<AnthropicApi.Tool> existingTools = request.tools() != null ? new ArrayList<>(request.tools())
-					: new ArrayList<>();
-			boolean hasCodeExecution = existingTools.stream().anyMatch(tool -> "code_execution".equals(tool.name()));
+		// Add output configuration if specified (structured output / effort)
+		if (requestOptions.getOutputConfig() != null) {
+			builder.outputConfig(requestOptions.getOutputConfig());
+		}
 
-			if (!hasCodeExecution) {
-				existingTools
-					.add(new AnthropicApi.Tool(AnthropicApi.CODE_EXECUTION_TOOL_TYPE, "code_execution", null, null));
-				request = ChatCompletionRequest.from(request).tools(existingTools).build();
+		// Add tool definitions if any are configured
+		List<ToolDefinition> toolDefinitions = this.toolCallingManager.resolveToolDefinitions(requestOptions);
+		if (!CollectionUtils.isEmpty(toolDefinitions)) {
+			List<Tool> tools = toolDefinitions.stream().map(this::toAnthropicTool).toList();
+
+			// Apply cache control to the last tool if caching strategy includes tools
+			CacheControlEphemeral toolCacheControl = cacheResolver.resolveToolCacheControl();
+			if (toolCacheControl != null && !tools.isEmpty()) {
+				List<Tool> modifiedTools = new ArrayList<>();
+				for (int i = 0; i < tools.size(); i++) {
+					Tool tool = tools.get(i);
+					if (i == tools.size() - 1) {
+						tool = tool.toBuilder().cacheControl(toolCacheControl).build();
+						cacheResolver.useCacheBlock();
+					}
+					modifiedTools.add(tool);
+				}
+				tools = modifiedTools;
 			}
 
-			// Apply saved toolChoice now that code_execution tool has been added
-			if (savedToolChoice != null) {
-				request = ChatCompletionRequest.from(request).toolChoice(savedToolChoice).build();
+			builder.tools(tools.stream().map(ToolUnion::ofTool).toList());
+
+			// Set tool choice if specified, applying disableParallelToolUse if set
+			if (requestOptions.getToolChoice() != null) {
+				ToolChoice toolChoice = requestOptions.getToolChoice();
+				if (Boolean.TRUE.equals(requestOptions.getDisableParallelToolUse())) {
+					toolChoice = applyDisableParallelToolUse(toolChoice);
+				}
+				builder.toolChoice(toolChoice);
+			}
+			else if (Boolean.TRUE.equals(requestOptions.getDisableParallelToolUse())) {
+				builder.toolChoice(ToolChoice.ofAuto(ToolChoiceAuto.builder().disableParallelToolUse(true).build()));
 			}
 		}
-		else if (savedToolChoice != null) {
-			// No Skills but toolChoice was set - apply it now
-			request = ChatCompletionRequest.from(request).toolChoice(savedToolChoice).build();
+
+		// Per-request HTTP headers
+		if (!requestOptions.getHttpHeaders().isEmpty()) {
+			requestOptions.getHttpHeaders().forEach((key, value) -> builder.putAdditionalHeader(key, value));
 		}
 
-		// Add beta headers if needed
-		if (requestOptions != null) {
-			Map<String, String> headers = new HashMap<>(requestOptions.getHttpHeaders());
-			boolean needsUpdate = false;
+		// Skills support
+		AnthropicSkillContainer skillContainer = requestOptions.getSkillContainer();
+		if (skillContainer == null && this.options.getSkillContainer() != null) {
+			skillContainer = this.options.getSkillContainer();
+		}
+		if (skillContainer != null) {
+			// Add container with skills config
+			builder.putAdditionalBodyProperty("container",
+					JsonValue.from(Map.of("skills", skillContainer.toSkillsList())));
 
-			// Add Skills beta headers if Skills are present
-			// Skills require three beta headers: skills, code-execution, and files-api
-			if (skillContainer != null) {
-				String existingBeta = headers.get("anthropic-beta");
-				String requiredBetas = AnthropicApi.BETA_SKILLS + "," + AnthropicApi.BETA_CODE_EXECUTION + ","
-						+ AnthropicApi.BETA_FILES_API;
+			// Add code execution tool if not already present in user-defined tools
+			boolean hasCodeExecution = !CollectionUtils.isEmpty(toolDefinitions)
+					&& toolDefinitions.stream().anyMatch(td -> td.name().contains("code_execution"));
+			if (!hasCodeExecution) {
+				builder.addTool(CodeExecutionTool20260120.builder().build());
+			}
 
-				if (existingBeta != null) {
-					// Add missing beta headers
-					if (!existingBeta.contains(AnthropicApi.BETA_SKILLS)) {
-						existingBeta = existingBeta + "," + AnthropicApi.BETA_SKILLS;
-					}
-					if (!existingBeta.contains(AnthropicApi.BETA_CODE_EXECUTION)) {
-						existingBeta = existingBeta + "," + AnthropicApi.BETA_CODE_EXECUTION;
-					}
-					if (!existingBeta.contains(AnthropicApi.BETA_FILES_API)) {
-						existingBeta = existingBeta + "," + AnthropicApi.BETA_FILES_API;
-					}
-					headers.put("anthropic-beta", existingBeta);
+			// Add beta headers, merging with any existing anthropic-beta value
+			String existingBeta = requestOptions.getHttpHeaders().get("anthropic-beta");
+			if (existingBeta != null) {
+				StringBuilder merged = new StringBuilder(existingBeta);
+				if (!existingBeta.contains(BETA_SKILLS)) {
+					merged.append(",").append(BETA_SKILLS);
 				}
-				else {
-					headers.put("anthropic-beta", requiredBetas);
+				if (!existingBeta.contains(BETA_CODE_EXECUTION)) {
+					merged.append(",").append(BETA_CODE_EXECUTION);
+				}
+				if (!existingBeta.contains(BETA_FILES_API)) {
+					merged.append(",").append(BETA_FILES_API);
 				}
-				needsUpdate = true;
+				builder.putAdditionalHeader("anthropic-beta", merged.toString());
 			}
+			else {
+				builder.putAdditionalHeader("anthropic-beta",
+						BETA_SKILLS + "," + BETA_CODE_EXECUTION + "," + BETA_FILES_API);
+			}
+		}
 
-			// Add extended cache TTL beta header if needed
-			if (cacheOptions.getMessageTypeTtl().containsValue(AnthropicCacheTtl.ONE_HOUR)) {
-				String existingBeta = headers.get("anthropic-beta");
-				if (existingBeta != null && !existingBeta.contains(AnthropicApi.BETA_EXTENDED_CACHE_TTL)) {
-					headers.put("anthropic-beta", existingBeta + "," + AnthropicApi.BETA_EXTENDED_CACHE_TTL);
-				}
-				else if (existingBeta == null) {
-					headers.put("anthropic-beta", AnthropicApi.BETA_EXTENDED_CACHE_TTL);
-				}
-				needsUpdate = true;
+		return builder.build();
+	}
+
+	/**
+	 * Combines text from all messages up to and including the specified index, for use in
+	 * cache eligibility length checks during CONVERSATION_HISTORY caching.
+	 * @param messages the list of non-system messages
+	 * @param lastUserIndex the index of the last user message (inclusive)
+	 * @return the combined text of eligible messages
+	 */
+	private String combineEligibleMessagesText(List<org.springframework.ai.chat.messages.Message> messages,
+			int lastUserIndex) {
+		StringBuilder combined = new StringBuilder();
+		for (int i = 0; i <= lastUserIndex && i < messages.size(); i++) {
+			String text = messages.get(i).getText();
+			if (text != null) {
+				combined.append(text);
 			}
+		}
+		return combined.toString();
+	}
 
-			if (needsUpdate) {
-				requestOptions.setHttpHeaders(headers);
+	/**
+	 * Builds generations from the Anthropic message response. Extracts text, tool calls,
+	 * thinking content, and citations from the response content blocks.
+	 * @param message the Anthropic message response
+	 * @param citationAccumulator collects citations found in text blocks
+	 * @return list of generations with text, tool calls, and/or thinking content
+	 */
+	private List<Generation> buildGenerations(Message message, List<Citation> citationAccumulator) {
+		List<Generation> generations = new ArrayList<>();
+
+		String finishReason = message.stopReason().map(r -> r.toString()).orElse("");
+		ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.builder().finishReason(finishReason).build();
+
+		// Collect text and tool calls from content blocks
+		StringBuilder textContent = new StringBuilder();
+		List<ToolCall> toolCalls = new ArrayList<>();
+
+		for (ContentBlock block : message.content()) {
+			if (block.isText()) {
+				TextBlock textBlock = block.asText();
+				textContent.append(textBlock.text());
+
+				// Extract citations from text blocks if present
+				textBlock.citations().ifPresent(textCitations -> {
+					for (TextCitation tc : textCitations) {
+						Citation citation = convertTextCitation(tc);
+						if (citation != null) {
+							citationAccumulator.add(citation);
+						}
+					}
+				});
+			}
+			else if (block.isToolUse()) {
+				ToolUseBlock toolUseBlock = block.asToolUse();
+				// ToolUseBlock._input() returns JsonValue, which needs to be converted
+				// to a JSON string via the visitor pattern since JsonValue.toString()
+				// produces Java Map format ("{key=value}"), not valid JSON.
+				String arguments = convertJsonValueToString(toolUseBlock._input());
+				toolCalls.add(new ToolCall(toolUseBlock.id(), "function", toolUseBlock.name(), arguments));
+			}
+			else if (block.isThinking()) {
+				// ThinkingBlock: stored as a separate Generation with the thinking
+				// text as content and signature in metadata properties.
+				ThinkingBlock thinkingBlock = block.asThinking();
+				Map<String, Object> thinkingProperties = new HashMap<>();
+				thinkingProperties.put("signature", thinkingBlock.signature());
+				generations.add(new Generation(AssistantMessage.builder()
+					.content(thinkingBlock.thinking())
+					.properties(thinkingProperties)
+					.build(), generationMetadata));
+			}
+			else if (block.isRedactedThinking()) {
+				// RedactedThinkingBlock: safety-redacted reasoning with a data marker.
+				RedactedThinkingBlock redactedBlock = block.asRedactedThinking();
+				Map<String, Object> redactedProperties = new HashMap<>();
+				redactedProperties.put("data", redactedBlock.data());
+				generations.add(new Generation(AssistantMessage.builder().properties(redactedProperties).build(),
+						generationMetadata));
 			}
+			else if (block.isContainerUpload() || block.isServerToolUse() || block.isBashCodeExecutionToolResult()
+					|| block.isTextEditorCodeExecutionToolResult() || block.isCodeExecutionToolResult()) {
+				logger.warn("Unsupported content block type: {}", block);
+			}
+		}
+
+		AssistantMessage.Builder assistantMessageBuilder = AssistantMessage.builder().content(textContent.toString());
+
+		if (!toolCalls.isEmpty()) {
+			assistantMessageBuilder.toolCalls(toolCalls);
 		}
 
-		return request;
+		generations.add(new Generation(assistantMessageBuilder.build(), generationMetadata));
+
+		return generations;
 	}
 
 	/**
-	 * Helper method to serialize content from ContentBlock. The content field can be
-	 * either a String or a complex object (for Skills responses).
-	 * @param content The content to serialize
-	 * @return String representation of the content, or null if content is null
+	 * Creates chat response metadata from the Anthropic message.
+	 * @param message the Anthropic message
+	 * @param usage the usage information
+	 * @return the chat response metadata
 	 */
-	private static @Nullable String serializeContent(@Nullable Object content) {
-		if (content == null) {
-			return null;
+	private ChatResponseMetadata from(Message message, Usage usage, List<Citation> citations) {
+		Assert.notNull(message, "Anthropic Message must not be null");
+		ChatResponseMetadata.Builder metadataBuilder = ChatResponseMetadata.builder()
+			.id(message.id())
+			.usage(usage)
+			.model(message.model().asString())
+			.keyValue("anthropic-response", message);
+		if (!citations.isEmpty()) {
+			metadataBuilder.keyValue("citations", citations).keyValue("citationCount", citations.size());
 		}
-		if (content instanceof String s) {
-			return s;
+		return metadataBuilder.build();
+	}
+
+	/**
+	 * Converts Anthropic SDK usage to Spring AI usage.
+	 * @param usage the Anthropic SDK usage
+	 * @return the Spring AI usage
+	 */
+	private Usage getDefaultUsage(com.anthropic.models.messages.Usage usage) {
+		if (usage == null) {
+			return new EmptyUsage();
 		}
-		return JsonParser.toJson(content);
+		long inputTokens = usage.inputTokens();
+		long outputTokens = usage.outputTokens();
+		return new DefaultUsage(Math.toIntExact(inputTokens), Math.toIntExact(outputTokens),
+				Math.toIntExact(inputTokens + outputTokens), usage);
 	}
 
-	private static ContentBlock cacheAwareContentBlock(ContentBlock contentBlock, MessageType messageType,
-			CacheEligibilityResolver cacheEligibilityResolver) {
-		String basisForLength = switch (contentBlock.type()) {
-			case TEXT, TEXT_DELTA -> contentBlock.text();
-			case TOOL_RESULT -> serializeContent(contentBlock.content());
-			case TOOL_USE -> JsonParser.toJson(contentBlock.input());
-			case THINKING, THINKING_DELTA -> contentBlock.thinking();
-			case REDACTED_THINKING -> contentBlock.data();
-			default -> null;
-		};
-		return cacheAwareContentBlock(contentBlock, messageType, cacheEligibilityResolver, basisForLength);
+	private @Nullable Citation convertTextCitation(TextCitation textCitation) {
+		if (textCitation.isCharLocation()) {
+			return fromCharLocation(textCitation.asCharLocation());
+		}
+		else if (textCitation.isPageLocation()) {
+			return fromPageLocation(textCitation.asPageLocation());
+		}
+		else if (textCitation.isContentBlockLocation()) {
+			return fromContentBlockLocation(textCitation.asContentBlockLocation());
+		}
+		return null;
 	}
 
-	private static ContentBlock cacheAwareContentBlock(ContentBlock contentBlock, MessageType messageType,
-			CacheEligibilityResolver cacheEligibilityResolver, @Nullable String basisForLength) {
-		ChatCompletionRequest.CacheControl cacheControl = cacheEligibilityResolver.resolve(messageType, basisForLength);
-		if (cacheControl == null) {
-			return contentBlock;
+	private @Nullable Citation convertStreamingCitation(CitationsDelta.Citation citation) {
+		if (citation.isCharLocation()) {
+			return fromCharLocation(citation.asCharLocation());
 		}
-		cacheEligibilityResolver.useCacheBlock();
-		return ContentBlock.from(contentBlock).cacheControl(cacheControl).build();
+		else if (citation.isPageLocation()) {
+			return fromPageLocation(citation.asPageLocation());
+		}
+		else if (citation.isContentBlockLocation()) {
+			return fromContentBlockLocation(citation.asContentBlockLocation());
+		}
+		return null;
+	}
+
+	private Citation fromCharLocation(CitationCharLocation loc) {
+		return Citation.ofCharLocation(loc.citedText(), (int) loc.documentIndex(), loc.documentTitle().orElse(null),
+				(int) loc.startCharIndex(), (int) loc.endCharIndex());
 	}
 
-	private List<AnthropicApi.Tool> getFunctionTools(List<ToolDefinition> toolDefinitions) {
-		return toolDefinitions.stream().map(toolDefinition -> {
-			var name = toolDefinition.name();
-			var description = toolDefinition.description();
-			String inputSchema = toolDefinition.inputSchema();
-			return new AnthropicApi.Tool(name, description, JsonParser.fromJson(inputSchema, new TypeReference<>() {
-			}));
-		}).toList();
+	private Citation fromPageLocation(CitationPageLocation loc) {
+		return Citation.ofPageLocation(loc.citedText(), (int) loc.documentIndex(), loc.documentTitle().orElse(null),
+				(int) loc.startPageNumber(), (int) loc.endPageNumber());
+	}
+
+	private Citation fromContentBlockLocation(CitationContentBlockLocation loc) {
+		return Citation.ofContentBlockLocation(loc.citedText(), (int) loc.documentIndex(),
+				loc.documentTitle().orElse(null), (int) loc.startBlockIndex(), (int) loc.endBlockIndex());
 	}
 
 	/**
-	 * Build messages strategically, applying cache control only where specified by the
-	 * strategy.
+	 * Converts a {@link JsonValue} to a valid JSON string. Required because
+	 * {@code JsonValue.toString()} produces Java Map format ({@code {key=value}}), not
+	 * valid JSON. Converts to native Java objects first, then serializes with Jackson.
+	 * @param jsonValue the SDK's JsonValue to convert
+	 * @return a valid JSON string
+	 * @throws RuntimeException if serialization fails
 	 */
-	private List<AnthropicMessage> buildMessages(Prompt prompt, CacheEligibilityResolver cacheEligibilityResolver) {
+	private String convertJsonValueToString(JsonValue jsonValue) {
+		try {
+			var jsonMapper = tools.jackson.databind.json.JsonMapper.builder().build();
+			// Convert to native Java objects first, then serialize with Jackson
+			Object nativeValue = convertJsonValueToNative(jsonValue);
+			return jsonMapper.writeValueAsString(nativeValue);
+		}
+		catch (Exception e) {
+			throw new RuntimeException("Failed to convert JsonValue to string", e);
+		}
+	}
 
-		List<Message> allMessages = prompt.getInstructions()
-			.stream()
-			.filter(message -> message.getMessageType() != MessageType.SYSTEM)
-			.toList();
+	/**
+	 * Converts a {@link JsonValue} to a native Java object (null, Boolean, Number,
+	 * String, List, or Map) using the SDK's visitor interface.
+	 * @param jsonValue the SDK's JsonValue to convert
+	 * @return the equivalent native Java object, or null for JSON null
+	 */
+	private @Nullable Object convertJsonValueToNative(JsonValue jsonValue) {
+		return jsonValue.accept(new JsonValue.Visitor<@Nullable Object>() {
+			@Override
+			public @Nullable Object visitNull() {
+				return null;
+			}
 
-		// Find the last user message (current question) for CONVERSATION_HISTORY strategy
-		int lastUserIndex = -1;
-		if (cacheEligibilityResolver.isCachingEnabled()) {
-			for (int i = allMessages.size() - 1; i >= 0; i--) {
-				if (allMessages.get(i).getMessageType() == MessageType.USER) {
-					lastUserIndex = i;
-					break;
-				}
+			@Override
+			public @Nullable Object visitMissing() {
+				return null;
 			}
-		}
 
-		// Get citation documents from options
-		List<CitationDocument> citationDocuments = null;
-		if (prompt.getOptions() instanceof AnthropicChatOptions anthropicOptions) {
-			citationDocuments = anthropicOptions.getCitationDocuments();
-		}
+			@Override
+			public Object visitBoolean(boolean value) {
+				return value;
+			}
 
-		List<AnthropicMessage> result = new ArrayList<>();
-		for (int i = 0; i < allMessages.size(); i++) {
-			Message message = allMessages.get(i);
-			MessageType messageType = message.getMessageType();
-			if (messageType == MessageType.USER) {
-				List<ContentBlock> contentBlocks = new ArrayList<>();
-				// Add citation documents to the FIRST user message only
-				if (i == 0 && citationDocuments != null && !citationDocuments.isEmpty()) {
-					for (CitationDocument doc : citationDocuments) {
-						contentBlocks.add(doc.toContentBlock());
-					}
-				}
-				String content = message.getText();
-				// For conversation history caching, apply cache control to the
-				// last user message to cache the entire conversation up to that point.
-				boolean isLastUserMessage = (lastUserIndex >= 0) && (i == lastUserIndex);
-				ContentBlock contentBlock = new ContentBlock(content);
-				if (isLastUserMessage && cacheEligibilityResolver.isCachingEnabled()) {
-					// Combine text from all messages (user, assistant, tool) up to and
-					// including the last user message as the basis for cache eligibility
-					// checks
-					String combinedMessagesText = combineEligibleMessagesText(allMessages, lastUserIndex);
-					contentBlocks.add(cacheAwareContentBlock(contentBlock, messageType, cacheEligibilityResolver,
-							combinedMessagesText));
-				}
-				else {
-					contentBlocks.add(contentBlock);
-				}
-				if (message instanceof UserMessage userMessage) {
-					if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
-						List<ContentBlock> mediaContent = userMessage.getMedia().stream().map(media -> {
-							Type contentBlockType = getContentBlockTypeByMedia(media);
-							var source = getSourceByMedia(media);
-							return new ContentBlock(contentBlockType, source);
-						}).toList();
-						contentBlocks.addAll(mediaContent);
-					}
-				}
-				result.add(new AnthropicMessage(contentBlocks, Role.valueOf(message.getMessageType().name())));
+			@Override
+			public Object visitNumber(Number value) {
+				return value;
 			}
-			else if (messageType == MessageType.ASSISTANT) {
-				AssistantMessage assistantMessage = (AssistantMessage) message;
-				List<ContentBlock> contentBlocks = new ArrayList<>();
-				if (StringUtils.hasText(message.getText())) {
-					ContentBlock contentBlock = new ContentBlock(message.getText());
-					contentBlocks.add(cacheAwareContentBlock(contentBlock, messageType, cacheEligibilityResolver));
-				}
-				if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
-					for (AssistantMessage.ToolCall toolCall : assistantMessage.getToolCalls()) {
-						ContentBlock contentBlock = new ContentBlock(Type.TOOL_USE, toolCall.id(), toolCall.name(),
-								ModelOptionsUtils.jsonToMap(toolCall.arguments()));
-						contentBlocks.add(cacheAwareContentBlock(contentBlock, messageType, cacheEligibilityResolver));
-					}
-				}
-				result.add(new AnthropicMessage(contentBlocks, Role.ASSISTANT));
+
+			@Override
+			public Object visitString(String value) {
+				return value;
 			}
-			else if (messageType == MessageType.TOOL) {
-				List<ContentBlock> toolResponses = ((ToolResponseMessage) message).getResponses()
-					.stream()
-					.map(toolResponse -> new ContentBlock(Type.TOOL_RESULT, toolResponse.id(),
-							toolResponse.responseData()))
-					.map(contentBlock -> cacheAwareContentBlock(contentBlock, messageType, cacheEligibilityResolver))
-					.toList();
-				result.add(new AnthropicMessage(toolResponses, Role.USER));
+
+			@Override
+			public Object visitArray(List<? extends JsonValue> values) {
+				return values.stream().map(v -> convertJsonValueToNative(v)).toList();
 			}
-			else {
-				throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType());
+
+			@Override
+			public Object visitObject(java.util.Map<String, ? extends JsonValue> values) {
+				java.util.Map<String, Object> result = new java.util.LinkedHashMap<>();
+				for (java.util.Map.Entry<String, ? extends JsonValue> entry : values.entrySet()) {
+					result.put(entry.getKey(), convertJsonValueToNative(entry.getValue()));
+				}
+				return result;
 			}
-		}
-		return result;
+		});
 	}
 
-	private String combineEligibleMessagesText(List<Message> allMessages, int lastUserIndex) {
-		// Only 20 content blocks are considered by anthropic, so limit the number of
-		// message content to consider. We include all message types (user, assistant,
-		// tool)
-		// up to and including the last user message for aggregate eligibility checking.
-		int startIndex = Math.max(0, lastUserIndex - 19);
-		int endIndex = Math.min(allMessages.size(), lastUserIndex + 1);
-		StringBuilder sb = new StringBuilder();
-		for (int i = startIndex; i < endIndex; i++) {
-			Message message = allMessages.get(i);
-			String text = message.getText();
-			if (StringUtils.hasText(text)) {
-				sb.append(text);
+	/**
+	 * Builds a {@link ToolUseBlockParam.Input} from a JSON arguments string.
+	 * <p>
+	 * When rebuilding conversation history, we need to include the tool call arguments
+	 * that were originally sent by the model. This method parses the JSON arguments
+	 * string and creates the proper SDK input format.
+	 * @param argumentsJson the JSON string containing tool call arguments
+	 * @return a ToolUseBlockParam.Input with the parsed arguments
+	 */
+	private ToolUseBlockParam.Input buildToolInput(String argumentsJson) {
+		ToolUseBlockParam.Input.Builder inputBuilder = ToolUseBlockParam.Input.builder();
+		if (argumentsJson != null && !argumentsJson.isEmpty()) {
+			try {
+				var jsonMapper = tools.jackson.databind.json.JsonMapper.builder().build();
+				java.util.Map<String, Object> arguments = jsonMapper.readValue(argumentsJson,
+						new tools.jackson.core.type.TypeReference<java.util.Map<String, Object>>() {
+						});
+				for (java.util.Map.Entry<String, Object> entry : arguments.entrySet()) {
+					inputBuilder.putAdditionalProperty(entry.getKey(), JsonValue.from(entry.getValue()));
+				}
+			}
+			catch (Exception e) {
+				logger.warn("Failed to parse tool arguments JSON: {}", argumentsJson, e);
 			}
 		}
-		return sb.toString();
+		return inputBuilder.build();
 	}
 
 	/**
-	 * Build system content - as array if caching, string otherwise.
+	 * Converts a Spring AI {@link ToolDefinition} to an Anthropic SDK {@link Tool}.
+	 * <p>
+	 * Spring AI provides the input schema as a JSON string, but the SDK expects a
+	 * structured {@code Tool.InputSchema} built via the builder pattern.
+	 * <p>
+	 * Conversion: parses the JSON schema to a Map, extracts "properties" (added via
+	 * {@code putAdditionalProperty()}), extracts "required" fields (added via
+	 * {@code addRequired()}), then builds the Tool with name, description, and schema.
+	 * @param toolDefinition the tool definition with name, description, and JSON schema
+	 * @return the Anthropic SDK Tool
+	 * @throws RuntimeException if the JSON schema cannot be parsed
 	 */
-	private @Nullable Object buildSystemContent(Prompt prompt, CacheEligibilityResolver cacheEligibilityResolver) {
+	@SuppressWarnings("unchecked")
+	private Tool toAnthropicTool(ToolDefinition toolDefinition) {
+		try {
+			// Parse the JSON schema string into a Map
+			var jsonMapper = tools.jackson.databind.json.JsonMapper.builder().build();
+			java.util.Map<String, Object> schemaMap = jsonMapper.readValue(toolDefinition.inputSchema(),
+					new tools.jackson.core.type.TypeReference<java.util.Map<String, Object>>() {
+					});
+
+			// Build properties via putAdditionalProperty (SDK requires structured input)
+			Tool.InputSchema.Properties.Builder propertiesBuilder = Tool.InputSchema.Properties.builder();
+			Object propertiesObj = schemaMap.get("properties");
+			if (propertiesObj instanceof java.util.Map) {
+				java.util.Map<String, Object> properties = (java.util.Map<String, Object>) propertiesObj;
+				for (java.util.Map.Entry<String, Object> entry : properties.entrySet()) {
+					propertiesBuilder.putAdditionalProperty(entry.getKey(), JsonValue.from(entry.getValue()));
+				}
+			}
 
-		String systemText = prompt.getInstructions()
-			.stream()
-			.filter(m -> m.getMessageType() == MessageType.SYSTEM)
-			.map(Message::getText)
-			.collect(Collectors.joining(System.lineSeparator()));
+			Tool.InputSchema.Builder inputSchemaBuilder = Tool.InputSchema.builder()
+				.properties(propertiesBuilder.build());
 
-		if (!StringUtils.hasText(systemText)) {
-			return null;
+			// Add required fields if present
+			Object requiredObj = schemaMap.get("required");
+			if (requiredObj instanceof java.util.List) {
+				java.util.List<String> required = (java.util.List<String>) requiredObj;
+				for (String req : required) {
+					inputSchemaBuilder.addRequired(req);
+				}
+			}
+
+			return Tool.builder()
+				.name(toolDefinition.name())
+				.description(toolDefinition.description())
+				.inputSchema(inputSchemaBuilder.build())
+				.build();
 		}
+		catch (Exception e) {
+			throw new RuntimeException("Failed to parse tool input schema: " + toolDefinition.inputSchema(), e);
+		}
+	}
+
+	/**
+	 * Converts a Spring AI {@link Media} object to an Anthropic SDK
+	 * {@link ContentBlockParam}. Supports images (PNG, JPEG, GIF, WebP) and PDF
+	 * documents. Data can be provided as byte[] (base64 encoded) or HTTPS URL string.
+	 * @param media the media object containing MIME type and data
+	 * @return the appropriate ContentBlockParam (ImageBlockParam or DocumentBlockParam)
+	 * @throws IllegalArgumentException if the media type is unsupported
+	 */
+	private ContentBlockParam getContentBlockParamByMedia(Media media) {
+		MimeType mimeType = media.getMimeType();
+		String data = fromMediaData(media.getData());
 
-		// Use array format when caching system
-		if (cacheEligibilityResolver.isCachingEnabled()) {
-			return List
-				.of(cacheAwareContentBlock(new ContentBlock(systemText), MessageType.SYSTEM, cacheEligibilityResolver));
+		if (isImageMedia(mimeType)) {
+			return createImageBlockParam(mimeType, data);
+		}
+		else if (isPdfMedia(mimeType)) {
+			return createDocumentBlockParam(data);
 		}
+		throw new IllegalArgumentException("Unsupported media type: " + mimeType
+				+ ". Supported types are: images (image/*) and PDF documents (application/pdf)");
+	}
 
-		// Use string format when not caching (backward compatible)
-		return systemText;
+	/**
+	 * Checks if the given MIME type represents an image.
+	 * @param mimeType the MIME type to check
+	 * @return true if the type is image/*
+	 */
+	private boolean isImageMedia(MimeType mimeType) {
+		return "image".equals(mimeType.getType());
+	}
+
+	/**
+	 * Checks if the given MIME type represents a PDF document.
+	 * @param mimeType the MIME type to check
+	 * @return true if the type is application/pdf
+	 */
+	private boolean isPdfMedia(MimeType mimeType) {
+		return "application".equals(mimeType.getType()) && "pdf".equals(mimeType.getSubtype());
 	}
 
 	/**
-	 * Add cache control to the last tool for deterministic caching.
+	 * Extracts media data as a string. Converts byte[] to base64, passes through URL
+	 * strings.
+	 * @param mediaData the media data (byte[] or String)
+	 * @return base64-encoded string or URL string
+	 * @throws IllegalArgumentException if data type is unsupported
 	 */
-	private List<AnthropicApi.Tool> addCacheToLastTool(List<AnthropicApi.Tool> tools,
-			CacheEligibilityResolver cacheEligibilityResolver) {
+	private String fromMediaData(Object mediaData) {
+		if (mediaData instanceof byte[] bytes) {
+			return Base64.getEncoder().encodeToString(bytes);
+		}
+		else if (mediaData instanceof String text) {
+			return text;
+		}
+		throw new IllegalArgumentException("Unsupported media data type: " + mediaData.getClass().getSimpleName()
+				+ ". Expected byte[] or String.");
+	}
 
-		ChatCompletionRequest.CacheControl cacheControl = cacheEligibilityResolver.resolveToolCacheControl();
+	/**
+	 * Creates an {@link ImageBlockParam} from the given MIME type and data.
+	 * @param mimeType the image MIME type (image/png, image/jpeg, etc.)
+	 * @param data base64-encoded image data or HTTPS URL
+	 * @return the ImageBlockParam wrapped in ContentBlockParam
+	 */
+	private ContentBlockParam createImageBlockParam(MimeType mimeType, String data) {
+		ImageBlockParam.Source source;
+		if (data.startsWith("https://")) {
+			source = ImageBlockParam.Source.ofUrl(UrlImageSource.builder().url(data).build());
+		}
+		else {
+			source = ImageBlockParam.Source
+				.ofBase64(Base64ImageSource.builder().data(data).mediaType(toSdkImageMediaType(mimeType)).build());
+		}
+		return ContentBlockParam.ofImage(ImageBlockParam.builder().source(source).build());
+	}
 
-		if (cacheControl == null || tools == null || tools.isEmpty()) {
-			return tools;
+	/**
+	 * Creates a {@link DocumentBlockParam} for PDF documents.
+	 * @param data base64-encoded PDF data or HTTPS URL
+	 * @return the DocumentBlockParam wrapped in ContentBlockParam
+	 */
+	private ContentBlockParam createDocumentBlockParam(String data) {
+		DocumentBlockParam.Source source;
+		if (data.startsWith("https://")) {
+			source = DocumentBlockParam.Source.ofUrl(UrlPdfSource.builder().url(data).build());
+		}
+		else {
+			source = DocumentBlockParam.Source.ofBase64(Base64PdfSource.builder().data(data).build());
 		}
+		return ContentBlockParam.ofDocument(DocumentBlockParam.builder().source(source).build());
+	}
 
-		List<AnthropicApi.Tool> modifiedTools = new ArrayList<>();
-		for (int i = 0; i < tools.size(); i++) {
-			AnthropicApi.Tool tool = tools.get(i);
-			if (i == tools.size() - 1) {
-				// Add cache control to last tool
-				tool = new AnthropicApi.Tool(tool.type(), tool.name(), tool.description(), tool.inputSchema(),
-						cacheControl);
-				cacheEligibilityResolver.useCacheBlock();
-			}
-			modifiedTools.add(tool);
+	/**
+	 * Converts a Spring MIME type to the SDK's {@link Base64ImageSource.MediaType}.
+	 * @param mimeType the Spring MIME type
+	 * @return the SDK media type enum value
+	 * @throws IllegalArgumentException if the image type is unsupported
+	 */
+	private Base64ImageSource.MediaType toSdkImageMediaType(MimeType mimeType) {
+		String subtype = mimeType.getSubtype();
+		return switch (subtype) {
+			case "png" -> Base64ImageSource.MediaType.IMAGE_PNG;
+			case "jpeg", "jpg" -> Base64ImageSource.MediaType.IMAGE_JPEG;
+			case "gif" -> Base64ImageSource.MediaType.IMAGE_GIF;
+			case "webp" -> Base64ImageSource.MediaType.IMAGE_WEBP;
+			default -> throw new IllegalArgumentException("Unsupported image type: " + mimeType
+					+ ". Supported types: image/png, image/jpeg, image/gif, image/webp");
+		};
+	}
+
+	/**
+	 * Applies {@code disableParallelToolUse} to an existing {@link ToolChoice} by
+	 * rebuilding the appropriate subtype with the flag set to {@code true}.
+	 */
+	private ToolChoice applyDisableParallelToolUse(ToolChoice toolChoice) {
+		if (toolChoice.isAuto()) {
+			return ToolChoice.ofAuto(toolChoice.asAuto().toBuilder().disableParallelToolUse(true).build());
+		}
+		else if (toolChoice.isAny()) {
+			return ToolChoice.ofAny(toolChoice.asAny().toBuilder().disableParallelToolUse(true).build());
+		}
+		else if (toolChoice.isTool()) {
+			return ToolChoice.ofTool(toolChoice.asTool().toBuilder().disableParallelToolUse(true).build());
 		}
-		return modifiedTools;
+		return toolChoice;
 	}
 
 	@Override
 	public ChatOptions getDefaultOptions() {
-		return AnthropicChatOptions.fromOptions(this.defaultOptions);
+		return this.options.copy();
 	}
 
 	/**
-	 * Use the provided convention for reporting observation data
-	 * @param observationConvention The provided convention
+	 * Use the provided convention for reporting observation data.
+	 * @param observationConvention the provided convention
 	 */
 	public void setObservationConvention(ChatModelObservationConvention observationConvention) {
 		Assert.notNull(observationConvention, "observationConvention cannot be null");
 		this.observationConvention = observationConvention;
 	}
 
-	public static Builder builder() {
-		return new Builder();
-	}
+	/**
+	 * Holds state accumulated during streaming for building complete responses. This
+	 * includes message metadata (ID, model, input tokens) and tool call accumulation
+	 * state for streaming tool calling support.
+	 */
+	private static class StreamingState {
 
-	public static final class Builder {
+		private final AtomicReference<String> messageId = new AtomicReference<>();
 
-		private @Nullable AnthropicApi anthropicApi;
+		private final AtomicReference<String> model = new AtomicReference<>();
 
-		private AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(DEFAULT_MODEL_NAME)
-			.maxTokens(DEFAULT_MAX_TOKENS)
-			.build();
+		private final AtomicReference<Long> inputTokens = new AtomicReference<>(0L);
 
-		private RetryTemplate retryTemplate = RetryUtils.DEFAULT_RETRY_TEMPLATE;
+		// Tool calling state - tracks the current tool being streamed
+		private final AtomicReference<String> currentToolId = new AtomicReference<>("");
 
-		private @Nullable ToolCallingManager toolCallingManager;
+		private final AtomicReference<String> currentToolName = new AtomicReference<>("");
 
-		private ObservationRegistry observationRegistry = ObservationRegistry.NOOP;
+		private final StringBuilder currentToolJsonAccumulator = new StringBuilder();
 
-		private ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate = new DefaultToolExecutionEligibilityPredicate();
+		private final List<ToolCall> completedToolCalls = new ArrayList<>();
 
-		private Builder() {
+		private final List<Citation> accumulatedCitations = new ArrayList<>();
+
+		void setMessageInfo(String id, String modelName, long tokens) {
+			this.messageId.set(id);
+			this.model.set(modelName);
+			this.inputTokens.set(tokens);
 		}
 
-		public Builder anthropicApi(AnthropicApi anthropicApi) {
-			this.anthropicApi = anthropicApi;
-			return this;
+		String getMessageId() {
+			return this.messageId.get();
 		}
 
-		public Builder defaultOptions(AnthropicChatOptions defaultOptions) {
-			this.defaultOptions = defaultOptions;
-			return this;
+		String getModel() {
+			return this.model.get();
 		}
 
-		public Builder retryTemplate(RetryTemplate retryTemplate) {
-			this.retryTemplate = retryTemplate;
-			return this;
+		long getInputTokens() {
+			return this.inputTokens.get();
 		}
 
-		public Builder toolCallingManager(ToolCallingManager toolCallingManager) {
-			this.toolCallingManager = toolCallingManager;
-			return this;
+		/**
+		 * Starts tracking a new tool use block.
+		 * @param toolId the tool call ID
+		 * @param toolName the tool name
+		 */
+		void startToolUse(String toolId, String toolName) {
+			this.currentToolId.set(toolId);
+			this.currentToolName.set(toolName);
+			this.currentToolJsonAccumulator.setLength(0);
 		}
 
-		public Builder toolExecutionEligibilityPredicate(
-				ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate) {
-			this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate;
-			return this;
+		/**
+		 * Appends partial JSON to the current tool's input accumulator.
+		 * @param partialJson the partial JSON string
+		 */
+		void appendToolJson(String partialJson) {
+			this.currentToolJsonAccumulator.append(partialJson);
 		}
 
-		public Builder observationRegistry(ObservationRegistry observationRegistry) {
-			this.observationRegistry = observationRegistry;
-			return this;
+		/**
+		 * Finalizes the current tool use block and adds it to completed tool calls.
+		 */
+		void finishToolUse() {
+			String id = this.currentToolId.get();
+			String name = this.currentToolName.get();
+			if (!id.isEmpty() && !name.isEmpty()) {
+				String arguments = this.currentToolJsonAccumulator.toString();
+				this.completedToolCalls.add(new ToolCall(id, "function", name, arguments));
+			}
+			// Reset current tool state (use empty string as "not tracking" sentinel)
+			this.currentToolId.set("");
+			this.currentToolName.set("");
+			this.currentToolJsonAccumulator.setLength(0);
 		}
 
-		public AnthropicChatModel build() {
-			Assert.state(this.anthropicApi != null, "AnthropicApi must not be null");
-			return new AnthropicChatModel(this.anthropicApi, this.defaultOptions,
-					Objects.requireNonNullElse(this.toolCallingManager, DEFAULT_TOOL_CALLING_MANAGER),
-					this.retryTemplate, this.observationRegistry, this.toolExecutionEligibilityPredicate);
+		/**
+		 * Returns true if currently tracking a tool use block.
+		 */
+		boolean isTrackingToolUse() {
+			return !this.currentToolId.get().isEmpty();
+		}
+
+		/**
+		 * Returns the list of completed tool calls accumulated during streaming.
+		 */
+		List<ToolCall> getCompletedToolCalls() {
+			return new ArrayList<>(this.completedToolCalls);
+		}
+
+		void addCitation(Citation citation) {
+			this.accumulatedCitations.add(citation);
+		}
+
+		List<Citation> getCitations() {
+			return new ArrayList<>(this.accumulatedCitations);
 		}
 
 	}
 
 	/**
-	 * Context object for tracking citations during response processing. Aggregates
-	 * citations from multiple content blocks in a single response.
+	 * Builder for creating {@link AnthropicChatModel} instances.
 	 */
-	class CitationContext {
+	public static final class Builder {
+
+		private @Nullable AnthropicClient anthropicClient;
+
+		private @Nullable AnthropicClientAsync anthropicClientAsync;
+
+		private @Nullable AnthropicChatOptions options;
+
+		private @Nullable ToolCallingManager toolCallingManager;
+
+		private @Nullable ObservationRegistry observationRegistry;
+
+		private @Nullable ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate;
+
+		private Builder() {
+		}
+
+		/**
+		 * Sets the synchronous Anthropic client.
+		 * @param anthropicClient the synchronous client
+		 * @return this builder
+		 */
+		public Builder anthropicClient(AnthropicClient anthropicClient) {
+			this.anthropicClient = anthropicClient;
+			return this;
+		}
+
+		/**
+		 * Sets the asynchronous Anthropic client.
+		 * @param anthropicClientAsync the asynchronous client
+		 * @return this builder
+		 */
+		public Builder anthropicClientAsync(AnthropicClientAsync anthropicClientAsync) {
+			this.anthropicClientAsync = anthropicClientAsync;
+			return this;
+		}
 
-		private final List<Citation> allCitations = new ArrayList<>();
+		/**
+		 * Sets the chat options.
+		 * @param options the chat options
+		 * @return this builder
+		 */
+		public Builder options(AnthropicChatOptions options) {
+			this.options = options;
+			return this;
+		}
 
-		public void addCitations(List<Citation> citations) {
-			this.allCitations.addAll(citations);
+		/**
+		 * Sets the tool calling manager.
+		 * @param toolCallingManager the tool calling manager
+		 * @return this builder
+		 */
+		public Builder toolCallingManager(ToolCallingManager toolCallingManager) {
+			this.toolCallingManager = toolCallingManager;
+			return this;
 		}
 
-		public boolean hasCitations() {
-			return !this.allCitations.isEmpty();
+		/**
+		 * Sets the observation registry for metrics and tracing.
+		 * @param observationRegistry the observation registry
+		 * @return this builder
+		 */
+		public Builder observationRegistry(ObservationRegistry observationRegistry) {
+			this.observationRegistry = observationRegistry;
+			return this;
 		}
 
-		public List<Citation> getAllCitations() {
-			return new ArrayList<>(this.allCitations);
+		/**
+		 * Sets the predicate to determine tool execution eligibility.
+		 * @param toolExecutionEligibilityPredicate the predicate
+		 * @return this builder
+		 */
+		public Builder toolExecutionEligibilityPredicate(
+				ToolExecutionEligibilityPredicate toolExecutionEligibilityPredicate) {
+			this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate;
+			return this;
 		}
 
-		public int getTotalCitationCount() {
-			return this.allCitations.size();
+		/**
+		 * Builds a new {@link AnthropicChatModel} instance.
+		 * @return the configured chat model
+		 */
+		public AnthropicChatModel build() {
+			return new AnthropicChatModel(this.anthropicClient, this.anthropicClientAsync, this.options,
+					this.toolCallingManager, this.observationRegistry, this.toolExecutionEligibilityPredicate);
 		}
 
 	}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
index be81b345c6b..5f036227959 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
@@ -16,28 +16,34 @@
 
 package org.springframework.ai.anthropic;
 
+import java.net.Proxy;
+import java.time.Duration;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 
+import com.anthropic.core.JsonValue;
+import com.anthropic.models.messages.JsonOutputFormat;
+import com.anthropic.models.messages.Metadata;
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.ThinkingConfigAdaptive;
+import com.anthropic.models.messages.ThinkingConfigDisabled;
+import com.anthropic.models.messages.ThinkingConfigEnabled;
+import com.anthropic.models.messages.ThinkingConfigParam;
+import com.anthropic.models.messages.ToolChoice;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
-import com.fasterxml.jackson.annotation.JsonProperty;
 import org.jspecify.annotations.Nullable;
+import tools.jackson.core.type.TypeReference;
+import tools.jackson.databind.json.JsonMapper;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.OutputFormat;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.CitationDocument;
 import org.springframework.ai.chat.prompt.ChatOptions;
-import org.springframework.ai.model.ModelOptionsUtils;
 import org.springframework.ai.model.tool.DefaultToolCallingChatOptions;
 import org.springframework.ai.model.tool.StructuredOutputChatOptions;
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
@@ -45,7 +51,13 @@
 import org.springframework.util.Assert;
 
 /**
- * The options to be used when sending a chat request to the Anthropic API.
+ * Chat options for {@link AnthropicChatModel}. Supports model selection, sampling
+ * parameters (temperature, topP, topK), output control (maxTokens, stopSequences), and
+ * tool calling configuration.
+ *
+ * <p>
+ * Options can be set as defaults during model construction or overridden per-request via
+ * the {@link org.springframework.ai.chat.prompt.Prompt}.
  *
  * @author Christian Tzolov
  * @author Thomas Vitale
@@ -54,149 +66,150 @@
  * @author Soby Chacko
  * @author Austin Dase
  * @since 1.0.0
+ * @see AnthropicChatModel
+ * @see <a href="https://docs.anthropic.com/en/api/messages">Anthropic Messages API</a>
  */
 @JsonInclude(Include.NON_NULL)
-public class AnthropicChatOptions implements ToolCallingChatOptions, StructuredOutputChatOptions {
-
-	// @formatter:off
-	@SuppressWarnings("NullAway.Init")
-	private @JsonProperty("model") String model;
-	@SuppressWarnings("NullAway.Init")
-	private @JsonProperty("max_tokens") Integer maxTokens;
-	private @JsonProperty("metadata") ChatCompletionRequest.@Nullable Metadata metadata;
-	private @JsonProperty("stop_sequences") @Nullable List<String> stopSequences;
-	private @JsonProperty("temperature") @Nullable Double temperature;
-	private @JsonProperty("top_p") @Nullable Double topP;
-	private @JsonProperty("top_k") @Nullable Integer topK;
-	private @JsonProperty("tool_choice") AnthropicApi.@Nullable ToolChoice toolChoice;
-	private @JsonProperty("thinking") ChatCompletionRequest.@Nullable ThinkingConfig thinking;
+public class AnthropicChatOptions extends AbstractAnthropicOptions
+		implements ToolCallingChatOptions, StructuredOutputChatOptions {
 
 	/**
-	 * Documents to be used for citation-based responses. These documents will be
-	 * converted to ContentBlocks and included in the first user message of the request.
-	 * Citations indicating which parts of these documents were used in the response will
-	 * be returned in the response metadata under the "citations" key.
-	 * @see CitationDocument
-	 * @see Citation
+	 * Default model to use for chat completions.
 	 */
-	@JsonIgnore
-	private List<CitationDocument> citationDocuments = new ArrayList<>();
+	public static final String DEFAULT_MODEL = Model.CLAUDE_HAIKU_4_5.asString();
 
-	@JsonIgnore
-	private AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.DISABLED;
+	/**
+	 * Default max tokens for chat completions.
+	 */
+	public static final Integer DEFAULT_MAX_TOKENS = 4096;
 
-	public AnthropicCacheOptions getCacheOptions() {
-		return this.cacheOptions;
-	}
+	/**
+	 * Maximum number of tokens to generate in the response.
+	 */
+	private @Nullable Integer maxTokens;
 
-	public void setCacheOptions(AnthropicCacheOptions cacheOptions) {
-		this.cacheOptions = cacheOptions;
-	}
+	/**
+	 * Request metadata containing user ID for abuse detection.
+	 */
+	private @Nullable Metadata metadata;
 
 	/**
-	 * Container for Claude Skills to make available in this request.
-	 * Skills are collections of instructions, scripts, and resources that
-	 * extend Claude's capabilities for specific domains.
-	 * Maximum of 8 skills per request.
+	 * Sequences that will cause the model to stop generating.
 	 */
-	@JsonIgnore
-	private AnthropicApi.@Nullable SkillContainer skillContainer;
+	private @Nullable List<String> stopSequences;
 
-	public AnthropicApi.@Nullable SkillContainer getSkillContainer() {
-		return this.skillContainer;
-	}
+	/**
+	 * Sampling temperature between 0 and 1. Higher values make output more random.
+	 */
+	private @Nullable Double temperature;
 
-	public void setSkillContainer(AnthropicApi.@Nullable SkillContainer skillContainer) {
-		this.skillContainer = skillContainer;
-	}
+	/**
+	 * Nucleus sampling parameter. The model considers tokens with top_p probability mass.
+	 */
+	private @Nullable Double topP;
 
 	/**
-	 * Collection of {@link ToolCallback}s to be used for tool calling in the chat
-	 * completion requests.
+	 * Only sample from the top K options for each subsequent token.
+	 */
+	private @Nullable Integer topK;
+
+	/**
+	 * Tool choice configuration for controlling tool usage behavior.
+	 */
+	private @Nullable ToolChoice toolChoice;
+
+	/**
+	 * Extended thinking configuration for Claude's reasoning capabilities.
+	 */
+	private @Nullable ThinkingConfigParam thinking;
+
+	/**
+	 * Whether to disable parallel tool use. When true, the model will use at most one
+	 * tool per response.
+	 */
+	private @Nullable Boolean disableParallelToolUse;
+
+	/**
+	 * Collection of tool callbacks for tool calling.
 	 */
 	@JsonIgnore
 	private List<ToolCallback> toolCallbacks = new ArrayList<>();
 
 	/**
-	 * Collection of tool names to be resolved at runtime and used for tool calling in the
-	 * chat completion requests.
+	 * Collection of tool names to be resolved at runtime.
 	 */
 	@JsonIgnore
-	private Set<String> toolNames = new HashSet<>();
+	private Set<String> toolNames = new java.util.HashSet<>();
 
 	/**
-	 * Whether to enable the tool execution lifecycle internally in ChatModel.
+	 * Whether to enable internal tool execution in the chat model.
 	 */
 	@JsonIgnore
 	private @Nullable Boolean internalToolExecutionEnabled;
 
+	/**
+	 * Context to be passed to tools during execution.
+	 */
 	@JsonIgnore
 	private Map<String, Object> toolContext = new HashMap<>();
 
-
 	/**
-	 * Optional HTTP headers to be added to the chat completion request.
+	 * Citation documents to include in the request for citation-enabled responses.
 	 */
 	@JsonIgnore
-	private Map<String, String> httpHeaders = new HashMap<>();
+	private List<AnthropicCitationDocument> citationDocuments = new ArrayList<>();
 
 	/**
-	 * The desired response format for structured output.
+	 * Cache options for configuring prompt caching behavior.
 	 */
-	private @JsonProperty("output_format") @Nullable OutputFormat outputFormat;
+	@JsonIgnore
+	private AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.disabled();
 
-	// @formatter:on
+	/**
+	 * Output configuration for controlling response format and effort level. Includes
+	 * structured output (JSON schema) and effort control (LOW, MEDIUM, HIGH, MAX).
+	 */
+	@JsonIgnore
+	private @Nullable OutputConfig outputConfig;
 
-	public static AnthropicChatOptions.Builder<?> builder() {
-		return new Builder<>();
-	}
+	/**
+	 * Per-request HTTP headers to include in the API call. Merged with model-level
+	 * defaults (runtime headers take precedence). Used for beta feature headers, custom
+	 * tracking, etc.
+	 */
+	@JsonIgnore
+	private Map<String, String> httpHeaders = new HashMap<>();
 
-	public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions) {
-		return builder().model(fromOptions.getModel())
-			.maxTokens(fromOptions.getMaxTokens())
-			.metadata(fromOptions.getMetadata())
-			.stopSequences(
-					fromOptions.getStopSequences() != null ? new ArrayList<>(fromOptions.getStopSequences()) : null)
-			.temperature(fromOptions.getTemperature())
-			.topP(fromOptions.getTopP())
-			.topK(fromOptions.getTopK())
-			.toolChoice(fromOptions.getToolChoice())
-			.thinking(fromOptions.getThinking())
-			.toolCallbacks(new ArrayList<>(fromOptions.getToolCallbacks()))
-			.toolNames(new HashSet<>(fromOptions.getToolNames()))
-			.internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled())
-			.toolContext(new HashMap<>(fromOptions.getToolContext()))
-			.httpHeaders(new HashMap<>(fromOptions.getHttpHeaders()))
-			.cacheOptions(fromOptions.getCacheOptions())
-			.citationDocuments(new ArrayList<>(fromOptions.getCitationDocuments()))
-			.outputFormat(fromOptions.getOutputFormat())
-			.skillContainer(fromOptions.getSkillContainer())
-			.build();
-	}
+	/**
+	 * Skills container for configuring Claude Skills in the request.
+	 */
+	@JsonIgnore
+	private @Nullable AnthropicSkillContainer skillContainer;
 
-	@Override
-	public String getModel() {
-		return this.model;
-	}
+	private static final JsonMapper JSON_MAPPER = JsonMapper.builder().build();
 
-	public void setModel(String model) {
-		this.model = model;
+	/**
+	 * Creates a new builder for AnthropicChatOptions.
+	 * @return a new builder instance
+	 */
+	public static Builder<?> builder() {
+		return new Builder<>();
 	}
 
 	@Override
-	public Integer getMaxTokens() {
+	public @Nullable Integer getMaxTokens() {
 		return this.maxTokens;
 	}
 
-	public void setMaxTokens(Integer maxTokens) {
+	public void setMaxTokens(@Nullable Integer maxTokens) {
 		this.maxTokens = maxTokens;
 	}
 
-	public ChatCompletionRequest.@Nullable Metadata getMetadata() {
+	public @Nullable Metadata getMetadata() {
 		return this.metadata;
 	}
 
-	public void setMetadata(ChatCompletionRequest.@Nullable Metadata metadata) {
+	public void setMetadata(@Nullable Metadata metadata) {
 		this.metadata = metadata;
 	}
 
@@ -236,30 +249,36 @@ public void setTopK(@Nullable Integer topK) {
 		this.topK = topK;
 	}
 
-	public AnthropicApi.@Nullable ToolChoice getToolChoice() {
+	public @Nullable ToolChoice getToolChoice() {
 		return this.toolChoice;
 	}
 
-	public void setToolChoice(AnthropicApi.@Nullable ToolChoice toolChoice) {
+	public void setToolChoice(@Nullable ToolChoice toolChoice) {
 		this.toolChoice = toolChoice;
 	}
 
-	public ChatCompletionRequest.@Nullable ThinkingConfig getThinking() {
+	public @Nullable ThinkingConfigParam getThinking() {
 		return this.thinking;
 	}
 
-	public void setThinking(ChatCompletionRequest.@Nullable ThinkingConfig thinking) {
+	public void setThinking(@Nullable ThinkingConfigParam thinking) {
 		this.thinking = thinking;
 	}
 
+	public @Nullable Boolean getDisableParallelToolUse() {
+		return this.disableParallelToolUse;
+	}
+
+	public void setDisableParallelToolUse(@Nullable Boolean disableParallelToolUse) {
+		this.disableParallelToolUse = disableParallelToolUse;
+	}
+
 	@Override
-	@JsonIgnore
 	public List<ToolCallback> getToolCallbacks() {
 		return this.toolCallbacks;
 	}
 
 	@Override
-	@JsonIgnore
 	public void setToolCallbacks(List<ToolCallback> toolCallbacks) {
 		Assert.notNull(toolCallbacks, "toolCallbacks cannot be null");
 		Assert.noNullElements(toolCallbacks, "toolCallbacks cannot contain null elements");
@@ -267,13 +286,11 @@ public void setToolCallbacks(List<ToolCallback> toolCallbacks) {
 	}
 
 	@Override
-	@JsonIgnore
 	public Set<String> getToolNames() {
 		return this.toolNames;
 	}
 
 	@Override
-	@JsonIgnore
 	public void setToolNames(Set<String> toolNames) {
 		Assert.notNull(toolNames, "toolNames cannot be null");
 		Assert.noNullElements(toolNames, "toolNames cannot contain null elements");
@@ -282,56 +299,31 @@ public void setToolNames(Set<String> toolNames) {
 	}
 
 	@Override
-	@JsonIgnore
 	public @Nullable Boolean getInternalToolExecutionEnabled() {
 		return this.internalToolExecutionEnabled;
 	}
 
 	@Override
-	@JsonIgnore
 	public void setInternalToolExecutionEnabled(@Nullable Boolean internalToolExecutionEnabled) {
 		this.internalToolExecutionEnabled = internalToolExecutionEnabled;
 	}
 
 	@Override
-	@JsonIgnore
-	public @Nullable Double getFrequencyPenalty() {
-		return null;
-	}
-
-	@Override
-	@JsonIgnore
-	public @Nullable Double getPresencePenalty() {
-		return null;
-	}
-
-	@Override
-	@JsonIgnore
 	public Map<String, Object> getToolContext() {
 		return this.toolContext;
 	}
 
 	@Override
-	@JsonIgnore
 	public void setToolContext(Map<String, Object> toolContext) {
 		this.toolContext = toolContext;
 	}
 
-	@JsonIgnore
-	public Map<String, String> getHttpHeaders() {
-		return this.httpHeaders;
-	}
-
-	public void setHttpHeaders(Map<String, String> httpHeaders) {
-		this.httpHeaders = httpHeaders;
-	}
-
-	public List<CitationDocument> getCitationDocuments() {
+	public List<AnthropicCitationDocument> getCitationDocuments() {
 		return this.citationDocuments;
 	}
 
-	public void setCitationDocuments(List<CitationDocument> citationDocuments) {
-		Assert.notNull(citationDocuments, "Citation documents cannot be null");
+	public void setCitationDocuments(List<AnthropicCitationDocument> citationDocuments) {
+		Assert.notNull(citationDocuments, "citationDocuments cannot be null");
 		this.citationDocuments = citationDocuments;
 	}
 
@@ -344,7 +336,8 @@ public void validateCitationConsistency() {
 			return;
 		}
 
-		boolean hasEnabledCitations = this.citationDocuments.stream().anyMatch(CitationDocument::isCitationsEnabled);
+		boolean hasEnabledCitations = this.citationDocuments.stream()
+			.anyMatch(AnthropicCitationDocument::isCitationsEnabled);
 		boolean hasDisabledCitations = this.citationDocuments.stream().anyMatch(doc -> !doc.isCitationsEnabled());
 
 		if (hasEnabledCitations && hasDisabledCitations) {
@@ -354,38 +347,155 @@ public void validateCitationConsistency() {
 		}
 	}
 
-	public @Nullable OutputFormat getOutputFormat() {
-		return this.outputFormat;
+	public AnthropicCacheOptions getCacheOptions() {
+		return this.cacheOptions;
+	}
+
+	public void setCacheOptions(AnthropicCacheOptions cacheOptions) {
+		Assert.notNull(cacheOptions, "cacheOptions cannot be null");
+		this.cacheOptions = cacheOptions;
+	}
+
+	@JsonIgnore
+	public @Nullable OutputConfig getOutputConfig() {
+		return this.outputConfig;
+	}
+
+	public void setOutputConfig(@Nullable OutputConfig outputConfig) {
+		this.outputConfig = outputConfig;
 	}
 
-	public void setOutputFormat(OutputFormat outputFormat) {
-		Assert.notNull(outputFormat, "outputFormat cannot be null");
-		this.outputFormat = outputFormat;
+	@JsonIgnore
+	public Map<String, String> getHttpHeaders() {
+		return this.httpHeaders;
+	}
+
+	public void setHttpHeaders(Map<String, String> httpHeaders) {
+		this.httpHeaders = httpHeaders;
+	}
+
+	@JsonIgnore
+	public @Nullable AnthropicSkillContainer getSkillContainer() {
+		return this.skillContainer;
+	}
+
+	public void setSkillContainer(@Nullable AnthropicSkillContainer skillContainer) {
+		this.skillContainer = skillContainer;
 	}
 
 	@Override
 	@JsonIgnore
 	public @Nullable String getOutputSchema() {
-		return this.getOutputFormat() != null ? ModelOptionsUtils.toJsonString(this.getOutputFormat().schema()) : null;
+		if (this.outputConfig == null) {
+			return null;
+		}
+		return this.outputConfig.format().map(format -> {
+			Map<String, JsonValue> schemaProps = format.schema()._additionalProperties();
+			Map<String, Object> nativeMap = new LinkedHashMap<>();
+			for (Map.Entry<String, JsonValue> entry : schemaProps.entrySet()) {
+				nativeMap.put(entry.getKey(), convertJsonValueToNative(entry.getValue()));
+			}
+			return JSON_MAPPER.writeValueAsString(nativeMap);
+		}).orElse(null);
 	}
 
 	@Override
 	@JsonIgnore
-	public void setOutputSchema(String outputSchema) {
-		this.setOutputFormat(new OutputFormat(outputSchema));
+	public void setOutputSchema(@Nullable String outputSchema) {
+		if (outputSchema == null) {
+			this.outputConfig = null;
+			return;
+		}
+		Map<String, Object> schemaMap = JSON_MAPPER.readValue(outputSchema, new TypeReference<Map<String, Object>>() {
+		});
+		JsonOutputFormat.Schema.Builder schemaBuilder = JsonOutputFormat.Schema.builder();
+		for (Map.Entry<String, Object> entry : schemaMap.entrySet()) {
+			schemaBuilder.putAdditionalProperty(entry.getKey(), JsonValue.from(entry.getValue()));
+		}
+		JsonOutputFormat jsonOutputFormat = JsonOutputFormat.builder().schema(schemaBuilder.build()).build();
+		OutputConfig.Builder configBuilder = OutputConfig.builder().format(jsonOutputFormat);
+		if (this.outputConfig != null) {
+			this.outputConfig.effort().ifPresent(configBuilder::effort);
+		}
+		this.outputConfig = configBuilder.build();
+	}
+
+	/**
+	 * Converts a {@link JsonValue} to a native Java object using the visitor pattern.
+	 * Maps to null, Boolean, Number, String, List, or Map recursively.
+	 * @param jsonValue the SDK's JsonValue to convert
+	 * @return the equivalent native Java object, or null for JSON null
+	 */
+	private static @Nullable Object convertJsonValueToNative(JsonValue jsonValue) {
+		return jsonValue.accept(new JsonValue.Visitor<@Nullable Object>() {
+			@Override
+			public @Nullable Object visitNull() {
+				return null;
+			}
+
+			@Override
+			public @Nullable Object visitMissing() {
+				return null;
+			}
+
+			@Override
+			public Object visitBoolean(boolean value) {
+				return value;
+			}
+
+			@Override
+			public Object visitNumber(Number value) {
+				return value;
+			}
+
+			@Override
+			public Object visitString(String value) {
+				return value;
+			}
+
+			@Override
+			public Object visitArray(List<? extends JsonValue> values) {
+				return values.stream().map(v -> convertJsonValueToNative(v)).toList();
+			}
+
+			@Override
+			public Object visitObject(Map<String, ? extends JsonValue> values) {
+				Map<String, Object> result = new LinkedHashMap<>();
+				for (Map.Entry<String, ? extends JsonValue> entry : values.entrySet()) {
+					result.put(entry.getKey(), convertJsonValueToNative(entry.getValue()));
+				}
+				return result;
+			}
+		});
+	}
+
+	@Override
+	public @Nullable Double getFrequencyPenalty() {
+		return null;
+	}
+
+	@Override
+	public @Nullable Double getPresencePenalty() {
+		return null;
 	}
 
 	@Override
-	@SuppressWarnings("unchecked")
 	public AnthropicChatOptions copy() {
-		return fromOptions(this);
+		return mutate().build();
 	}
 
 	@Override
-	public AnthropicChatOptions.Builder<?> mutate() {
+	public Builder<?> mutate() {
 		return builder()
+			// AbstractAnthropicOptions
+			.model(this.getModel())
+			.baseUrl(this.getBaseUrl())
+			.apiKey(this.getApiKey())
+			.timeout(this.getTimeout())
+			.maxRetries(this.getMaxRetries())
+			.proxy(this.getProxy())
+			.customHeaders(this.getCustomHeaders())
 			// ChatOptions
-			.model(this.model)
 			.frequencyPenalty(this.getFrequencyPenalty())
 			.maxTokens(this.maxTokens)
 			.presencePenalty(this.getPresencePenalty())
@@ -398,16 +508,16 @@ public AnthropicChatOptions.Builder<?> mutate() {
 			.toolNames(this.getToolNames())
 			.toolContext(this.getToolContext())
 			.internalToolExecutionEnabled(this.getInternalToolExecutionEnabled())
-			// StructuredOutputChatOptions
-			.outputFormat(this.outputFormat)
 			// Anthropic Specific
 			.metadata(this.metadata)
 			.toolChoice(this.toolChoice)
 			.thinking(this.thinking)
+			.disableParallelToolUse(this.disableParallelToolUse)
 			.citationDocuments(this.getCitationDocuments())
 			.cacheOptions(this.getCacheOptions())
-			.skillContainer(this.getSkillContainer())
-			.httpHeaders(this.getHttpHeaders());
+			.outputConfig(this.outputConfig)
+			.httpHeaders(this.getHttpHeaders())
+			.skillContainer(this.getSkillContainer());
 	}
 
 	@Override
@@ -418,64 +528,139 @@ public boolean equals(Object o) {
 		if (!(o instanceof AnthropicChatOptions that)) {
 			return false;
 		}
-		return Objects.equals(this.model, that.model) && Objects.equals(this.maxTokens, that.maxTokens)
+		return Objects.equals(this.getModel(), that.getModel()) && Objects.equals(this.maxTokens, that.maxTokens)
 				&& Objects.equals(this.metadata, that.metadata)
 				&& Objects.equals(this.stopSequences, that.stopSequences)
 				&& Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topP, that.topP)
 				&& Objects.equals(this.topK, that.topK) && Objects.equals(this.toolChoice, that.toolChoice)
 				&& Objects.equals(this.thinking, that.thinking)
+				&& Objects.equals(this.disableParallelToolUse, that.disableParallelToolUse)
 				&& Objects.equals(this.toolCallbacks, that.toolCallbacks)
 				&& Objects.equals(this.toolNames, that.toolNames)
 				&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled)
 				&& Objects.equals(this.toolContext, that.toolContext)
-				&& Objects.equals(this.httpHeaders, that.httpHeaders)
-				&& Objects.equals(this.cacheOptions, that.cacheOptions)
-				&& Objects.equals(this.outputFormat, that.outputFormat)
 				&& Objects.equals(this.citationDocuments, that.citationDocuments)
+				&& Objects.equals(this.cacheOptions, that.cacheOptions)
+				&& Objects.equals(this.outputConfig, that.outputConfig)
+				&& Objects.equals(this.httpHeaders, that.httpHeaders)
 				&& Objects.equals(this.skillContainer, that.skillContainer);
 	}
 
 	@Override
 	public int hashCode() {
-		return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP,
-				this.topK, this.toolChoice, this.thinking, this.toolCallbacks, this.toolNames,
-				this.internalToolExecutionEnabled, this.toolContext, this.httpHeaders, this.cacheOptions,
-				this.outputFormat, this.citationDocuments, this.skillContainer);
+		return Objects.hash(this.getModel(), this.maxTokens, this.metadata, this.stopSequences, this.temperature,
+				this.topP, this.topK, this.toolChoice, this.thinking, this.disableParallelToolUse, this.toolCallbacks,
+				this.toolNames, this.internalToolExecutionEnabled, this.toolContext, this.citationDocuments,
+				this.cacheOptions, this.outputConfig, this.httpHeaders, this.skillContainer);
 	}
 
+	@Override
+	public String toString() {
+		return "AnthropicChatOptions{" + "model='" + this.getModel() + '\'' + ", maxTokens=" + this.maxTokens
+				+ ", metadata=" + this.metadata + ", stopSequences=" + this.stopSequences + ", temperature="
+				+ this.temperature + ", topP=" + this.topP + ", topK=" + this.topK + ", toolChoice=" + this.toolChoice
+				+ ", thinking=" + this.thinking + ", disableParallelToolUse=" + this.disableParallelToolUse
+				+ ", toolCallbacks=" + this.toolCallbacks + ", toolNames=" + this.toolNames
+				+ ", internalToolExecutionEnabled=" + this.internalToolExecutionEnabled + ", toolContext="
+				+ this.toolContext + ", citationDocuments=" + this.citationDocuments + ", cacheOptions="
+				+ this.cacheOptions + ", outputConfig=" + this.outputConfig + ", httpHeaders=" + this.httpHeaders
+				+ ", skillContainer=" + this.skillContainer + '}';
+	}
+
+	/**
+	 * Builder for creating {@link AnthropicChatOptions} instances.
+	 */
 	public static class Builder<B extends Builder<B>> extends DefaultToolCallingChatOptions.Builder<B>
 			implements StructuredOutputChatOptions.Builder<B> {
 
-		private ChatCompletionRequest.@Nullable Metadata metadata;
+		// AbstractAnthropicOptions fields
+		private @Nullable String baseUrl;
+
+		private @Nullable String apiKey;
+
+		private @Nullable Duration timeout;
+
+		private @Nullable Integer maxRetries;
+
+		private @Nullable Proxy proxy;
+
+		private Map<String, String> customHeaders = new HashMap<>();
+
+		// Anthropic-specific fields
+		private @Nullable Metadata metadata;
+
+		private @Nullable ToolChoice toolChoice;
 
-		private AnthropicApi.@Nullable ToolChoice toolChoice;
+		private @Nullable ThinkingConfigParam thinking;
 
-		private ChatCompletionRequest.@Nullable ThinkingConfig thinking;
+		private @Nullable Boolean disableParallelToolUse;
 
-		private List<CitationDocument> citationDocuments = new ArrayList<>();
+		private List<AnthropicCitationDocument> citationDocuments = new ArrayList<>();
 
-		private AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.DISABLED;
+		private AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.disabled();
 
-		private AnthropicApi.@Nullable SkillContainer skillContainer;
+		private @Nullable OutputConfig outputConfig;
 
 		private Map<String, String> httpHeaders = new HashMap<>();
 
-		private @Nullable OutputFormat outputFormat;
+		private @Nullable AnthropicSkillContainer skillContainer;
 
 		@Override
 		public B outputSchema(@Nullable String outputSchema) {
 			if (outputSchema != null) {
-				this.outputFormat = new OutputFormat(outputSchema);
+				Map<String, Object> schemaMap = JSON_MAPPER.readValue(outputSchema,
+						new TypeReference<Map<String, Object>>() {
+						});
+				JsonOutputFormat.Schema.Builder schemaBuilder = JsonOutputFormat.Schema.builder();
+				for (Map.Entry<String, Object> entry : schemaMap.entrySet()) {
+					schemaBuilder.putAdditionalProperty(entry.getKey(), JsonValue.from(entry.getValue()));
+				}
+				JsonOutputFormat jsonOutputFormat = JsonOutputFormat.builder().schema(schemaBuilder.build()).build();
+				OutputConfig.Builder configBuilder = OutputConfig.builder().format(jsonOutputFormat);
+				if (this.outputConfig != null) {
+					this.outputConfig.effort().ifPresent(configBuilder::effort);
+				}
+				this.outputConfig = configBuilder.build();
 			}
 			else {
-				this.outputFormat = null;
+				this.outputConfig = null;
 			}
 			return self();
 		}
 
-		public B model(AnthropicApi.@Nullable ChatModel model) {
+		public B baseUrl(@Nullable String baseUrl) {
+			this.baseUrl = baseUrl;
+			return self();
+		}
+
+		public B apiKey(@Nullable String apiKey) {
+			this.apiKey = apiKey;
+			return self();
+		}
+
+		public B timeout(@Nullable Duration timeout) {
+			this.timeout = timeout;
+			return self();
+		}
+
+		public B maxRetries(@Nullable Integer maxRetries) {
+			this.maxRetries = maxRetries;
+			return self();
+		}
+
+		public B proxy(@Nullable Proxy proxy) {
+			this.proxy = proxy;
+			return self();
+		}
+
+		public B customHeaders(Map<String, String> customHeaders) {
+			this.customHeaders = customHeaders;
+			return self();
+		}
+
+		public B model(@Nullable Model model) {
 			if (model != null) {
-				this.model(model.getName());
+				this.model(model.asString());
 			}
 			else {
 				this.model((String) null);
@@ -483,178 +668,150 @@ public B model(AnthropicApi.@Nullable ChatModel model) {
 			return self();
 		}
 
-		public B metadata(ChatCompletionRequest.@Nullable Metadata metadata) {
+		public B metadata(@Nullable Metadata metadata) {
 			this.metadata = metadata;
 			return self();
 		}
 
-		public B toolChoice(AnthropicApi.@Nullable ToolChoice toolChoice) {
+		public B toolChoice(@Nullable ToolChoice toolChoice) {
 			this.toolChoice = toolChoice;
 			return self();
 		}
 
-		public B thinking(ChatCompletionRequest.@Nullable ThinkingConfig thinking) {
+		public B thinking(@Nullable ThinkingConfigParam thinking) {
 			this.thinking = thinking;
 			return self();
 		}
 
-		public B thinking(AnthropicApi.ThinkingType type, Integer budgetTokens) {
-			this.thinking = new ChatCompletionRequest.ThinkingConfig(type, budgetTokens);
+		/**
+		 * Convenience method to enable thinking with a specific budget in tokens.
+		 * @param budgetTokens the thinking budget (must be >= 1024 and < maxTokens)
+		 */
+		public B thinkingEnabled(long budgetTokens) {
+			return thinking(
+					ThinkingConfigParam.ofEnabled(ThinkingConfigEnabled.builder().budgetTokens(budgetTokens).build()));
+		}
+
+		/**
+		 * Convenience method to let Claude adaptively decide whether to think.
+		 */
+		public B thinkingAdaptive() {
+			return thinking(ThinkingConfigParam.ofAdaptive(ThinkingConfigAdaptive.builder().build()));
+		}
+
+		/**
+		 * Convenience method to explicitly disable thinking.
+		 */
+		public B thinkingDisabled() {
+			return thinking(ThinkingConfigParam.ofDisabled(ThinkingConfigDisabled.builder().build()));
+		}
+
+		public B disableParallelToolUse(@Nullable Boolean disableParallelToolUse) {
+			this.disableParallelToolUse = disableParallelToolUse;
 			return self();
 		}
 
-		public B httpHeaders(Map<String, String> httpHeaders) {
-			this.httpHeaders = httpHeaders;
+		public B citationDocuments(List<AnthropicCitationDocument> citationDocuments) {
+			Assert.notNull(citationDocuments, "citationDocuments cannot be null");
+			this.citationDocuments = new ArrayList<>(citationDocuments);
 			return self();
 		}
 
-		public B cacheOptions(AnthropicCacheOptions cacheOptions) {
-			this.cacheOptions = cacheOptions;
+		public B citationDocuments(AnthropicCitationDocument... citationDocuments) {
+			Assert.notNull(citationDocuments, "citationDocuments cannot be null");
+			this.citationDocuments.addAll(java.util.Arrays.asList(citationDocuments));
 			return self();
 		}
 
-		/**
-		 * Set citation documents for the request.
-		 * @param citationDocuments List of documents to include for citations
-		 * @return Builder for method chaining
-		 */
-		public B citationDocuments(List<CitationDocument> citationDocuments) {
-			Assert.notNull(citationDocuments, "Citation documents cannot be null");
-			this.citationDocuments = citationDocuments;
+		public B addCitationDocument(AnthropicCitationDocument citationDocument) {
+			Assert.notNull(citationDocument, "citationDocument cannot be null");
+			this.citationDocuments.add(citationDocument);
+			return self();
+		}
+
+		public B cacheOptions(AnthropicCacheOptions cacheOptions) {
+			Assert.notNull(cacheOptions, "cacheOptions cannot be null");
+			this.cacheOptions = cacheOptions;
 			return self();
 		}
 
 		/**
-		 * Set citation documents from variable arguments.
-		 * @param documents Variable number of CitationDocument objects
-		 * @return Builder for method chaining
+		 * Sets the output configuration for controlling response format and effort.
+		 * @param outputConfig the output configuration
+		 * @return this builder
 		 */
-		public B citationDocuments(CitationDocument... documents) {
-			Assert.notNull(documents, "Citation documents cannot be null");
-			this.citationDocuments.addAll(Arrays.asList(documents));
+		public B outputConfig(@Nullable OutputConfig outputConfig) {
+			this.outputConfig = outputConfig;
 			return self();
 		}
 
 		/**
-		 * Add a single citation document.
-		 * @param document Citation document to add
-		 * @return Builder for method chaining
+		 * Convenience method to set the effort level for the model's response.
+		 * @param effort the desired effort level (LOW, MEDIUM, HIGH, MAX)
+		 * @return this builder
 		 */
-		public B addCitationDocument(CitationDocument document) {
-			Assert.notNull(document, "Citation document cannot be null");
-			this.citationDocuments.add(document);
+		public B effort(OutputConfig.Effort effort) {
+			OutputConfig.Builder configBuilder = OutputConfig.builder().effort(effort);
+			if (this.outputConfig != null) {
+				this.outputConfig.format().ifPresent(configBuilder::format);
+			}
+			this.outputConfig = configBuilder.build();
 			return self();
 		}
 
-		public B outputFormat(@Nullable OutputFormat outputFormat) {
-			this.outputFormat = outputFormat;
+		public B httpHeaders(Map<String, String> httpHeaders) {
+			this.httpHeaders = new HashMap<>(httpHeaders);
 			return self();
 		}
 
-		/**
-		 * Set the Skills container for this request.
-		 * @param skillContainer Container with skills to make available
-		 * @return Builder for method chaining
-		 */
-		public B skillContainer(AnthropicApi.@Nullable SkillContainer skillContainer) {
+		public B skillContainer(@Nullable AnthropicSkillContainer skillContainer) {
 			this.skillContainer = skillContainer;
 			return self();
 		}
 
-		/**
-		 * Add a skill by its ID or name. Automatically detects whether it's a pre-built
-		 * Anthropic skill (xlsx, pptx, docx, pdf) or a custom skill ID.
-		 *
-		 * <p>
-		 * Example: <pre>{@code
-		 * AnthropicChatOptions options = AnthropicChatOptions.builder()
-		 *     .model("claude-sonnet-4-5")
-		 *     .skill("xlsx")                          // Pre-built skill
-		 *     .skill("skill_01abc123...")             // Custom skill
-		 *     .build();
-		 * }</pre>
-		 * @param skillIdOrName The skill ID or name
-		 * @return Builder for method chaining
-		 */
 		public B skill(String skillIdOrName) {
 			Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
-			AnthropicApi.AnthropicSkill prebuilt = AnthropicApi.AnthropicSkill.fromId(skillIdOrName);
+			AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
 			if (prebuilt != null) {
 				return this.skill(prebuilt.toSkill());
 			}
-			return this.skill(new AnthropicApi.Skill(AnthropicApi.SkillType.CUSTOM, skillIdOrName));
+			return this.skill(new AnthropicSkillRecord(AnthropicSkillType.CUSTOM, skillIdOrName));
 		}
 
-		/**
-		 * Add a skill by its ID or name with a specific version.
-		 * @param skillIdOrName The skill ID or name
-		 * @param version The version (e.g., "latest", "20251013")
-		 * @return Builder for method chaining
-		 */
 		public B skill(String skillIdOrName, String version) {
 			Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
 			Assert.hasText(version, "Version cannot be empty");
-			AnthropicApi.AnthropicSkill prebuilt = AnthropicApi.AnthropicSkill.fromId(skillIdOrName);
+			AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
 			if (prebuilt != null) {
 				return this.skill(prebuilt.toSkill(version));
 			}
-			return this.skill(new AnthropicApi.Skill(AnthropicApi.SkillType.CUSTOM, skillIdOrName, version));
+			return this.skill(new AnthropicSkillRecord(AnthropicSkillType.CUSTOM, skillIdOrName, version));
 		}
 
-		/**
-		 * Add a pre-built Anthropic skill using the enum.
-		 *
-		 * <p>
-		 * Example: <pre>{@code
-		 * AnthropicChatOptions options = AnthropicChatOptions.builder()
-		 *     .model("claude-sonnet-4-5")
-		 *     .skill(AnthropicSkill.XLSX)
-		 *     .skill(AnthropicSkill.PPTX)
-		 *     .build();
-		 * }</pre>
-		 * @param anthropicSkill Pre-built Anthropic skill to add
-		 * @return Builder for method chaining
-		 */
-		public B skill(AnthropicApi.AnthropicSkill anthropicSkill) {
+		public B skill(AnthropicSkill anthropicSkill) {
 			Assert.notNull(anthropicSkill, "AnthropicSkill cannot be null");
 			return this.skill(anthropicSkill.toSkill());
 		}
 
-		/**
-		 * Add a pre-built Anthropic skill with specific version.
-		 * @param anthropicSkill Pre-built Anthropic skill to add
-		 * @param version Version of the skill (e.g., "latest", "20251013")
-		 * @return Builder for method chaining
-		 */
-		public B skill(AnthropicApi.AnthropicSkill anthropicSkill, String version) {
+		public B skill(AnthropicSkill anthropicSkill, String version) {
 			Assert.notNull(anthropicSkill, "AnthropicSkill cannot be null");
 			Assert.hasText(version, "Version cannot be empty");
 			return this.skill(anthropicSkill.toSkill(version));
 		}
 
-		/**
-		 * Add a Skill record directly.
-		 * @param skill Skill to add
-		 * @return Builder for method chaining
-		 */
-		public B skill(AnthropicApi.Skill skill) {
+		public B skill(AnthropicSkillRecord skill) {
 			Assert.notNull(skill, "Skill cannot be null");
 			if (this.skillContainer == null) {
-				this.skillContainer = AnthropicApi.SkillContainer.builder().skill(skill).build();
+				this.skillContainer = AnthropicSkillContainer.builder().skill(skill).build();
 			}
 			else {
-				List<AnthropicApi.Skill> existingSkills = new ArrayList<>(this.skillContainer.skills());
+				List<AnthropicSkillRecord> existingSkills = new ArrayList<>(this.skillContainer.getSkills());
 				existingSkills.add(skill);
-				this.skillContainer = new AnthropicApi.SkillContainer(existingSkills);
+				this.skillContainer = new AnthropicSkillContainer(existingSkills);
 			}
 			return self();
 		}
 
-		/**
-		 * Add multiple skills by their IDs or names.
-		 * @param skillIds The skill IDs or names
-		 * @return Builder for method chaining
-		 */
 		public B skills(String... skillIds) {
 			Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
 			for (String skillId : skillIds) {
@@ -663,67 +820,34 @@ public B skills(String... skillIds) {
 			return self();
 		}
 
-		/**
-		 * Add multiple skills from a list of IDs or names.
-		 * @param skillIds The list of skill IDs or names
-		 * @return Builder for method chaining
-		 */
 		public B skills(List<String> skillIds) {
 			Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
 			skillIds.forEach(this::skill);
 			return self();
 		}
 
-		/**
-		 * Add an Anthropic pre-built skill (xlsx, pptx, docx, pdf).
-		 * @param anthropicSkill Pre-built Anthropic skill to add
-		 * @return Builder for method chaining
-		 * @deprecated Use {@link #skill(AnthropicApi.AnthropicSkill)} instead
-		 */
-		@Deprecated
-		public B anthropicSkill(AnthropicApi.AnthropicSkill anthropicSkill) {
-			return this.skill(anthropicSkill);
-		}
-
-		/**
-		 * Add an Anthropic pre-built skill with specific version.
-		 * @param anthropicSkill Pre-built Anthropic skill to add
-		 * @param version Version of the skill (e.g., "latest", "20251013")
-		 * @return Builder for method chaining
-		 * @deprecated Use {@link #skill(AnthropicApi.AnthropicSkill, String)} instead
-		 */
-		@Deprecated
-		public B anthropicSkill(AnthropicApi.AnthropicSkill anthropicSkill, String version) {
-			return this.skill(anthropicSkill, version);
-		}
-
-		/**
-		 * Add a custom skill by ID.
-		 * @param skillId Custom skill ID
-		 * @return Builder for method chaining
-		 * @deprecated Use {@link #skill(String)} instead
-		 */
-		@Deprecated
-		public B customSkill(String skillId) {
-			return this.skill(skillId);
-		}
-
-		/**
-		 * Add a custom skill with specific version.
-		 * @param skillId Custom skill ID
-		 * @param version Version of the skill
-		 * @return Builder for method chaining
-		 * @deprecated Use {@link #skill(String, String)} instead
-		 */
-		@Deprecated
-		public B customSkill(String skillId, String version) {
-			return this.skill(skillId, version);
-		}
-
 		@Override
 		public B combineWith(ChatOptions.Builder<?> other) {
 			super.combineWith(other);
 			if (other instanceof Builder<?> options) {
+				if (options.baseUrl != null) {
+					this.baseUrl = options.baseUrl;
+				}
+				if (options.apiKey != null) {
+					this.apiKey = options.apiKey;
+				}
+				if (options.timeout != null) {
+					this.timeout = options.timeout;
+				}
+				if (options.maxRetries != null) {
+					this.maxRetries = options.maxRetries;
+				}
+				if (options.proxy != null) {
+					this.proxy = options.proxy;
+				}
+				if (!options.customHeaders.isEmpty()) {
+					this.customHeaders = options.customHeaders;
+				}
 				if (options.metadata != null) {
 					this.metadata = options.metadata;
 				}
@@ -733,20 +857,23 @@ public B combineWith(ChatOptions.Builder<?> other) {
 				if (options.thinking != null) {
 					this.thinking = options.thinking;
 				}
+				if (options.disableParallelToolUse != null) {
+					this.disableParallelToolUse = options.disableParallelToolUse;
+				}
 				if (!options.citationDocuments.isEmpty()) {
 					this.citationDocuments = options.citationDocuments;
 				}
-				if (options.cacheOptions != AnthropicCacheOptions.DISABLED) {
+				if (options.cacheOptions != null && options.cacheOptions.getStrategy() != AnthropicCacheStrategy.NONE) {
 					this.cacheOptions = options.cacheOptions;
 				}
-				if (options.skillContainer != null) {
-					this.skillContainer = options.skillContainer;
+				if (options.outputConfig != null) {
+					this.outputConfig = options.outputConfig;
 				}
 				if (!options.httpHeaders.isEmpty()) {
 					this.httpHeaders = options.httpHeaders;
 				}
-				if (options.outputFormat != null) {
-					this.outputFormat = options.outputFormat;
+				if (options.skillContainer != null) {
+					this.skillContainer = options.skillContainer;
 				}
 			}
 			return self();
@@ -754,28 +881,36 @@ public B combineWith(ChatOptions.Builder<?> other) {
 
 		@SuppressWarnings("NullAway")
 		public AnthropicChatOptions build() {
-			// TODO: add assertions, remove SuppressWarnings
-			// Assert.state(this.model != null, "model must be set");
-			// Assert.state(this.maxTokens != null, "maxTokens must be set");
 			AnthropicChatOptions options = new AnthropicChatOptions();
-			options.model = this.model;
+			// AbstractAnthropicOptions fields
+			options.setModel(this.model);
+			options.setBaseUrl(this.baseUrl);
+			options.setApiKey(this.apiKey);
+			options.setTimeout(this.timeout);
+			options.setMaxRetries(this.maxRetries);
+			options.setProxy(this.proxy);
+			options.setCustomHeaders(this.customHeaders);
+			// ChatOptions fields
 			options.maxTokens = this.maxTokens;
-			options.metadata = this.metadata;
 			options.stopSequences = this.stopSequences;
 			options.temperature = this.temperature;
 			options.topP = this.topP;
 			options.topK = this.topK;
-			options.toolChoice = this.toolChoice;
-			options.thinking = this.thinking;
-			options.citationDocuments = this.citationDocuments;
-			options.cacheOptions = this.cacheOptions;
-			options.skillContainer = this.skillContainer;
+			// ToolCallingChatOptions fields
 			options.toolCallbacks = this.toolCallbacks;
 			options.toolNames = this.toolNames;
 			options.internalToolExecutionEnabled = this.internalToolExecutionEnabled;
 			options.toolContext = this.toolContext;
+			// Anthropic-specific fields
+			options.metadata = this.metadata;
+			options.toolChoice = this.toolChoice;
+			options.thinking = this.thinking;
+			options.disableParallelToolUse = this.disableParallelToolUse;
+			options.citationDocuments = this.citationDocuments;
+			options.cacheOptions = this.cacheOptions;
+			options.outputConfig = this.outputConfig;
 			options.httpHeaders = this.httpHeaders;
-			options.outputFormat = this.outputFormat;
+			options.skillContainer = this.skillContainer;
 			options.validateCitationConsistency();
 			return options;
 		}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/CitationDocument.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCitationDocument.java
similarity index 50%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/CitationDocument.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCitationDocument.java
index e2e09a5b851..d19252a29fe 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/CitationDocument.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicCitationDocument.java
@@ -14,27 +14,28 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api;
+package org.springframework.ai.anthropic;
 
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Base64;
 import java.util.List;
-import java.util.stream.Collectors;
 
+import com.anthropic.models.messages.Base64PdfSource;
+import com.anthropic.models.messages.CitationsConfigParam;
+import com.anthropic.models.messages.ContentBlockSource;
+import com.anthropic.models.messages.ContentBlockSourceContent;
+import com.anthropic.models.messages.DocumentBlockParam;
+import com.anthropic.models.messages.TextBlockParam;
 import org.jspecify.annotations.Nullable;
 
-import org.springframework.ai.anthropic.AnthropicChatOptions;
-import org.springframework.ai.anthropic.Citation;
 import org.springframework.util.Assert;
 
 /**
- * Builder class for creating citation-enabled documents. Provides a fluent API for
- * constructing documents of different types that can be converted to ContentBlocks for
- * the Anthropic API.
+ * Builder class for creating citation-enabled documents using the Anthropic Java SDK.
+ * Produces SDK {@link DocumentBlockParam} objects directly.
  *
  * <p>
  * Citations allow Claude to reference specific parts of provided documents in its
@@ -48,48 +49,37 @@
  * <b>Plain Text Document:</b>
  *
  * <pre>{@code
- * CitationDocument document = CitationDocument.builder()
+ * AnthropicCitationDocument document = AnthropicCitationDocument.builder()
  *     .plainText("The Eiffel Tower was completed in 1889 in Paris, France.")
  *     .title("Eiffel Tower Facts")
+ *     .citationsEnabled(true)
  *     .build();
- *
- * AnthropicChatOptions options = AnthropicChatOptions.builder()
- *     .model(AnthropicApi.ChatModel.CLAUDE_3_7_SONNET.getName())
- *     .citationDocuments(document)
- *     .build();
- *
- * ChatResponse response = chatModel.call(new Prompt("When was the Eiffel Tower built?", options));
- *
- * // Citations are available in response metadata
- * List<Citation> citations = (List<Citation>) response.getMetadata().get("citations");
  * }</pre>
  *
  * <p>
  * <b>PDF Document:</b>
  *
  * <pre>{@code
- * CitationDocument document = CitationDocument.builder()
+ * AnthropicCitationDocument document = AnthropicCitationDocument.builder()
  *     .pdfFile("path/to/document.pdf")
  *     .title("Technical Specification")
+ *     .citationsEnabled(true)
  *     .build();
- *
- * // PDF citations include page numbers
  * }</pre>
  *
  * <p>
  * <b>Custom Content Blocks:</b>
  *
  * <pre>{@code
- * CitationDocument document = CitationDocument.builder()
+ * AnthropicCitationDocument document = AnthropicCitationDocument.builder()
  *     .customContent(
  *         "Fact 1: The Great Wall spans 21,196 kilometers.",
  *         "Fact 2: Construction began in the 7th century BC.",
  *         "Fact 3: It was built to protect Chinese states."
  *     )
  *     .title("Great Wall Facts")
+ *     .citationsEnabled(true)
  *     .build();
- *
- * // Custom content citations reference specific content blocks
  * }</pre>
  *
  * @author Soby Chacko
@@ -97,30 +87,20 @@
  * @see Citation
  * @see AnthropicChatOptions#getCitationDocuments()
  */
-public final class CitationDocument {
+public final class AnthropicCitationDocument {
 
 	/**
-	 * Document types supported by Anthropic Citations API. Each type uses different
-	 * citation location formats in the response.
+	 * Document types supported by Anthropic Citations API.
 	 */
 	public enum DocumentType {
 
-		/**
-		 * Plain text document with character-based citations. Text is automatically
-		 * chunked by sentences and citations return character start/end indices.
-		 */
+		/** Plain text document with character-based citations. */
 		PLAIN_TEXT,
 
-		/**
-		 * PDF document with page-based citations. Content is extracted and chunked from
-		 * the PDF, and citations return page start/end numbers.
-		 */
+		/** PDF document with page-based citations. */
 		PDF,
 
-		/**
-		 * Custom content with user-defined blocks and block-based citations. Content is
-		 * provided as explicit blocks, and citations return block start/end indices.
-		 */
+		/** Custom content with user-defined blocks and block-based citations. */
 		CUSTOM_CONTENT
 
 	}
@@ -133,12 +113,11 @@ public enum DocumentType {
 	private @Nullable String context;
 
 	@SuppressWarnings("NullAway.Init")
-	private Object sourceData; // String for text, byte[] for PDF, List<ContentBlock> for
-								// custom
+	private Object sourceData;
 
 	private boolean citationsEnabled = false;
 
-	private CitationDocument() {
+	private AnthropicCitationDocument() {
 	}
 
 	public static Builder builder() {
@@ -146,28 +125,40 @@ public static Builder builder() {
 	}
 
 	/**
-	 * Convert this CitationDocument to a ContentBlock for API usage.
-	 * @return ContentBlock configured for citations
+	 * Convert this citation document to an SDK {@link DocumentBlockParam}.
+	 * @return configured DocumentBlockParam for the Anthropic API
 	 */
-	public AnthropicApi.ContentBlock toContentBlock() {
-		AnthropicApi.ContentBlock.Source source = createSource();
-		return new AnthropicApi.ContentBlock(source, this.title, this.context, this.citationsEnabled, null);
-	}
+	public DocumentBlockParam toDocumentBlockParam() {
+		CitationsConfigParam citationsConfig = CitationsConfigParam.builder().enabled(this.citationsEnabled).build();
+
+		DocumentBlockParam.Builder builder = DocumentBlockParam.builder();
 
-	private AnthropicApi.ContentBlock.Source createSource() {
-		return switch (this.type) {
-			case PLAIN_TEXT ->
-				new AnthropicApi.ContentBlock.Source("text", "text/plain", (String) this.sourceData, null, null);
+		switch (this.type) {
+			case PLAIN_TEXT -> builder.textSource((String) this.sourceData);
 			case PDF -> {
 				String base64Data = Base64.getEncoder().encodeToString((byte[]) this.sourceData);
-				yield new AnthropicApi.ContentBlock.Source("base64", "application/pdf", base64Data, null, null);
+				builder.source(DocumentBlockParam.Source.ofBase64(Base64PdfSource.builder().data(base64Data).build()));
 			}
 			case CUSTOM_CONTENT -> {
 				@SuppressWarnings("unchecked")
-				List<AnthropicApi.ContentBlock> content = (List<AnthropicApi.ContentBlock>) this.sourceData;
-				yield new AnthropicApi.ContentBlock.Source("content", null, null, null, content);
+				List<String> textBlocks = (List<String>) this.sourceData;
+				List<ContentBlockSourceContent> contentItems = textBlocks.stream()
+					.map(text -> ContentBlockSourceContent.ofText(TextBlockParam.builder().text(text).build()))
+					.toList();
+				builder.source(DocumentBlockParam.Source
+					.ofContent(ContentBlockSource.builder().contentOfBlockSource(contentItems).build()));
 			}
-		};
+		}
+
+		builder.citations(citationsConfig);
+		if (this.title != null) {
+			builder.title(this.title);
+		}
+		if (this.context != null) {
+			builder.context(this.context);
+		}
+
+		return builder.build();
 	}
 
 	public boolean isCitationsEnabled() {
@@ -175,16 +166,16 @@ public boolean isCitationsEnabled() {
 	}
 
 	/**
-	 * Builder class for CitationDocument.
+	 * Builder class for AnthropicCitationDocument.
 	 */
 	public static class Builder {
 
-		private final CitationDocument document = new CitationDocument();
+		private final AnthropicCitationDocument document = new AnthropicCitationDocument();
 
 		/**
 		 * Create a plain text document.
-		 * @param text The document text content
-		 * @return Builder for method chaining
+		 * @param text the document text content
+		 * @return builder for method chaining
 		 */
 		public Builder plainText(String text) {
 			Assert.hasText(text, "Text content cannot be null or empty");
@@ -195,8 +186,8 @@ public Builder plainText(String text) {
 
 		/**
 		 * Create a PDF document from byte array.
-		 * @param pdfBytes The PDF file content as bytes
-		 * @return Builder for method chaining
+		 * @param pdfBytes the PDF file content as bytes
+		 * @return builder for method chaining
 		 */
 		public Builder pdf(byte[] pdfBytes) {
 			Assert.notNull(pdfBytes, "PDF bytes cannot be null");
@@ -208,8 +199,8 @@ public Builder pdf(byte[] pdfBytes) {
 
 		/**
 		 * Create a PDF document from file path.
-		 * @param filePath Path to the PDF file
-		 * @return Builder for method chaining
+		 * @param filePath path to the PDF file
+		 * @return builder for method chaining
 		 * @throws IOException if file cannot be read
 		 */
 		public Builder pdfFile(String filePath) throws IOException {
@@ -218,37 +209,23 @@ public Builder pdfFile(String filePath) throws IOException {
 			return pdf(pdfBytes);
 		}
 
-		/**
-		 * Create a custom content document with user-defined blocks.
-		 * @param contentBlocks List of content blocks for fine-grained citation control
-		 * @return Builder for method chaining
-		 */
-		public Builder customContent(List<AnthropicApi.ContentBlock> contentBlocks) {
-			Assert.notNull(contentBlocks, "Content blocks cannot be null");
-			Assert.notEmpty(contentBlocks, "Content blocks cannot be empty");
-			this.document.type = DocumentType.CUSTOM_CONTENT;
-			this.document.sourceData = new ArrayList<>(contentBlocks);
-			return this;
-		}
-
 		/**
 		 * Create a custom content document from text blocks.
-		 * @param textBlocks Variable number of text strings to create content blocks
-		 * @return Builder for method chaining
+		 * @param textBlocks variable number of text strings to create content blocks
+		 * @return builder for method chaining
 		 */
 		public Builder customContent(String... textBlocks) {
 			Assert.notNull(textBlocks, "Text blocks cannot be null");
 			Assert.notEmpty(textBlocks, "Text blocks cannot be empty");
-			List<AnthropicApi.ContentBlock> blocks = Arrays.stream(textBlocks)
-				.map(AnthropicApi.ContentBlock::new)
-				.collect(Collectors.toList());
-			return customContent(blocks);
+			this.document.type = DocumentType.CUSTOM_CONTENT;
+			this.document.sourceData = Arrays.asList(textBlocks);
+			return this;
 		}
 
 		/**
-		 * Set the document title (optional, not included in citations).
-		 * @param title Document title for reference
-		 * @return Builder for method chaining
+		 * Set the document title.
+		 * @param title document title for reference
+		 * @return builder for method chaining
 		 */
 		public Builder title(String title) {
 			this.document.title = title;
@@ -256,9 +233,9 @@ public Builder title(String title) {
 		}
 
 		/**
-		 * Set the document context (optional, not included in citations).
-		 * @param context Additional context or metadata about the document
-		 * @return Builder for method chaining
+		 * Set the document context.
+		 * @param context additional context about the document
+		 * @return builder for method chaining
 		 */
 		public Builder context(String context) {
 			this.document.context = context;
@@ -267,8 +244,8 @@ public Builder context(String context) {
 
 		/**
 		 * Enable or disable citations for this document.
-		 * @param enabled Whether citations should be enabled
-		 * @return Builder for method chaining
+		 * @param enabled whether citations should be enabled
+		 * @return builder for method chaining
 		 */
 		public Builder citationsEnabled(boolean enabled) {
 			this.document.citationsEnabled = enabled;
@@ -276,10 +253,10 @@ public Builder citationsEnabled(boolean enabled) {
 		}
 
 		/**
-		 * Build the CitationDocument.
-		 * @return Configured CitationDocument
+		 * Build the AnthropicCitationDocument.
+		 * @return configured citation document
 		 */
-		public CitationDocument build() {
+		public AnthropicCitationDocument build() {
 			Assert.notNull(this.document.type, "Document type must be specified");
 			Assert.notNull(this.document.sourceData, "Document source data must be specified");
 			return this.document;
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSetup.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSetup.java
new file mode 100644
index 00000000000..9cebac0b984
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSetup.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.net.Proxy;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.client.AnthropicClientAsync;
+import com.anthropic.client.okhttp.AnthropicOkHttpClient;
+import com.anthropic.client.okhttp.AnthropicOkHttpClientAsync;
+import org.jspecify.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Factory class for creating and configuring Anthropic SDK client instances.
+ *
+ * <p>
+ * This utility class provides static factory methods for creating both synchronous
+ * ({@link AnthropicClient}) and asynchronous ({@link AnthropicClientAsync}) clients with
+ * comprehensive configuration support. It handles API key detection from environment
+ * variables and provides sensible defaults for timeouts and retry behavior.
+ *
+ * <p>
+ * <b>Client Types:</b>
+ * <ul>
+ * <li><b>Synchronous Client:</b> Used for blocking API calls via
+ * {@link #setupSyncClient}</li>
+ * <li><b>Asynchronous Client:</b> Used for streaming responses via
+ * {@link #setupAsyncClient}</li>
+ * </ul>
+ *
+ * <p>
+ * <b>Environment Variable Support:</b>
+ * <ul>
+ * <li>{@code ANTHROPIC_API_KEY} - Primary API key for authentication</li>
+ * <li>{@code ANTHROPIC_AUTH_TOKEN} - Alternative authentication token</li>
+ * <li>{@code ANTHROPIC_BASE_URL} - Override the default API endpoint</li>
+ * </ul>
+ *
+ * <p>
+ * <b>Default Configuration:</b>
+ * <ul>
+ * <li>Timeout: 60 seconds</li>
+ * <li>Max Retries: 2</li>
+ * <li>User-Agent: {@code spring-ai-anthropic-sdk}</li>
+ * </ul>
+ *
+ * <p>
+ * This class is not intended to be instantiated directly. Use the static factory methods
+ * to create client instances.
+ *
+ * @author Soby Chacko
+ * @since 2.0.0
+ * @see org.springframework.ai.anthropic.AnthropicChatModel
+ */
+public final class AnthropicSetup {
+
+	static final String ANTHROPIC_URL = "https://api.anthropic.com";
+
+	static final String ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY";
+
+	static final String ANTHROPIC_AUTH_TOKEN = "ANTHROPIC_AUTH_TOKEN";
+
+	static final String ANTHROPIC_BASE_URL = "ANTHROPIC_BASE_URL";
+
+	static final String DEFAULT_USER_AGENT = "spring-ai-anthropic-sdk";
+
+	private static final Logger logger = LoggerFactory.getLogger(AnthropicSetup.class);
+
+	private static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(60);
+
+	private static final int DEFAULT_MAX_RETRIES = 2;
+
+	private AnthropicSetup() {
+	}
+
+	/**
+	 * Creates a synchronous Anthropic client with the specified configuration.
+	 * @param baseUrl the base URL for the API (null to use default or environment
+	 * variable)
+	 * @param apiKey the API key (null to detect from environment)
+	 * @param timeout the request timeout (null to use default of 60 seconds)
+	 * @param maxRetries the maximum number of retries (null to use default of 2)
+	 * @param proxy the proxy to use (null for no proxy)
+	 * @param customHeaders additional HTTP headers to include in requests
+	 * @return a configured Anthropic client
+	 */
+	public static AnthropicClient setupSyncClient(@Nullable String baseUrl, @Nullable String apiKey,
+			@Nullable Duration timeout, @Nullable Integer maxRetries, @Nullable Proxy proxy,
+			@Nullable Map<String, String> customHeaders) {
+
+		baseUrl = detectBaseUrlFromEnv(baseUrl);
+
+		if (timeout == null) {
+			timeout = DEFAULT_TIMEOUT;
+		}
+		if (maxRetries == null) {
+			maxRetries = DEFAULT_MAX_RETRIES;
+		}
+
+		AnthropicOkHttpClient.Builder builder = AnthropicOkHttpClient.builder();
+
+		if (baseUrl != null) {
+			builder.baseUrl(baseUrl);
+		}
+
+		String resolvedApiKey = apiKey != null ? apiKey : detectApiKey();
+		if (resolvedApiKey != null) {
+			builder.apiKey(resolvedApiKey);
+		}
+
+		if (proxy != null) {
+			builder.proxy(proxy);
+		}
+
+		builder.putHeader("User-Agent", DEFAULT_USER_AGENT);
+		if (customHeaders != null) {
+			builder.putAllHeaders(customHeaders.entrySet()
+				.stream()
+				.collect(Collectors.toMap(Map.Entry::getKey, entry -> Collections.singletonList(entry.getValue()))));
+		}
+
+		builder.timeout(timeout);
+		builder.maxRetries(maxRetries);
+
+		return builder.build();
+	}
+
+	/**
+	 * Creates an asynchronous Anthropic client with the specified configuration. The
+	 * async client is used for streaming responses.
+	 * @param baseUrl the base URL for the API (null to use default or environment
+	 * variable)
+	 * @param apiKey the API key (null to detect from environment)
+	 * @param timeout the request timeout (null to use default of 60 seconds)
+	 * @param maxRetries the maximum number of retries (null to use default of 2)
+	 * @param proxy the proxy to use (null for no proxy)
+	 * @param customHeaders additional HTTP headers to include in requests
+	 * @return a configured async Anthropic client
+	 */
+	public static AnthropicClientAsync setupAsyncClient(@Nullable String baseUrl, @Nullable String apiKey,
+			@Nullable Duration timeout, @Nullable Integer maxRetries, @Nullable Proxy proxy,
+			@Nullable Map<String, String> customHeaders) {
+
+		baseUrl = detectBaseUrlFromEnv(baseUrl);
+
+		if (timeout == null) {
+			timeout = DEFAULT_TIMEOUT;
+		}
+		if (maxRetries == null) {
+			maxRetries = DEFAULT_MAX_RETRIES;
+		}
+
+		AnthropicOkHttpClientAsync.Builder builder = AnthropicOkHttpClientAsync.builder();
+
+		if (baseUrl != null) {
+			builder.baseUrl(baseUrl);
+		}
+
+		String resolvedApiKey = apiKey != null ? apiKey : detectApiKey();
+		if (resolvedApiKey != null) {
+			builder.apiKey(resolvedApiKey);
+		}
+
+		if (proxy != null) {
+			builder.proxy(proxy);
+		}
+
+		builder.putHeader("User-Agent", DEFAULT_USER_AGENT);
+		if (customHeaders != null) {
+			builder.putAllHeaders(customHeaders.entrySet()
+				.stream()
+				.collect(Collectors.toMap(Map.Entry::getKey, entry -> Collections.singletonList(entry.getValue()))));
+		}
+
+		builder.timeout(timeout);
+		builder.maxRetries(maxRetries);
+
+		return builder.build();
+	}
+
+	/**
+	 * Detects the base URL from environment variable if not explicitly provided.
+	 * @param baseUrl the explicitly provided base URL (may be null)
+	 * @return the base URL to use
+	 */
+	static @Nullable String detectBaseUrlFromEnv(@Nullable String baseUrl) {
+		if (baseUrl == null) {
+			String envBaseUrl = System.getenv(ANTHROPIC_BASE_URL);
+			if (envBaseUrl != null) {
+				logger.debug("Anthropic Base URL detected from environment variable {}.", ANTHROPIC_BASE_URL);
+				return envBaseUrl;
+			}
+		}
+		return baseUrl;
+	}
+
+	/**
+	 * Detects the API key from environment variables.
+	 * @return the API key, or null if not found
+	 */
+	static @Nullable String detectApiKey() {
+		String apiKey = System.getenv(ANTHROPIC_API_KEY);
+		if (apiKey != null) {
+			logger.debug("Anthropic API key detected from environment variable {}.", ANTHROPIC_API_KEY);
+			return apiKey;
+		}
+
+		String authToken = System.getenv(ANTHROPIC_AUTH_TOKEN);
+		if (authToken != null) {
+			logger.debug("Anthropic auth token detected from environment variable {}.", ANTHROPIC_AUTH_TOKEN);
+			return authToken;
+		}
+
+		return null;
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkill.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkill.java
new file mode 100644
index 00000000000..6148204b29a
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkill.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Enum representing the pre-built Anthropic Skills available for Claude.
+ *
+ * @author Soby Chacko
+ */
+public enum AnthropicSkill {
+
+	/**
+	 * Excel spreadsheet generation and manipulation.
+	 */
+	XLSX("xlsx", "Excel spreadsheet generation"),
+
+	/**
+	 * PowerPoint presentation creation.
+	 */
+	PPTX("pptx", "PowerPoint presentation creation"),
+
+	/**
+	 * Word document generation.
+	 */
+	DOCX("docx", "Word document generation"),
+
+	/**
+	 * PDF document creation.
+	 */
+	PDF("pdf", "PDF document creation");
+
+	private static final Map<String, AnthropicSkill> BY_ID;
+
+	static {
+		Map<String, AnthropicSkill> map = new HashMap<>();
+		for (AnthropicSkill skill : values()) {
+			map.put(skill.skillId.toLowerCase(), skill);
+		}
+		BY_ID = Collections.unmodifiableMap(map);
+	}
+
+	private final String skillId;
+
+	private final String description;
+
+	AnthropicSkill(String skillId, String description) {
+		this.skillId = skillId;
+		this.description = description;
+	}
+
+	/**
+	 * Look up a pre-built Anthropic skill by its ID.
+	 * @param skillId the skill ID (e.g., "xlsx", "pptx", "docx", "pdf")
+	 * @return the matching skill, or null if not found
+	 */
+	public static @Nullable AnthropicSkill fromId(@Nullable String skillId) {
+		if (skillId == null) {
+			return null;
+		}
+		return BY_ID.get(skillId.toLowerCase());
+	}
+
+	public String getSkillId() {
+		return this.skillId;
+	}
+
+	public String getDescription() {
+		return this.description;
+	}
+
+	/**
+	 * Convert to an {@link AnthropicSkillRecord} with latest version.
+	 * @return skill record
+	 */
+	public AnthropicSkillRecord toSkill() {
+		return new AnthropicSkillRecord(AnthropicSkillType.ANTHROPIC, this.skillId, "latest");
+	}
+
+	/**
+	 * Convert to an {@link AnthropicSkillRecord} with specific version.
+	 * @param version version string
+	 * @return skill record
+	 */
+	public AnthropicSkillRecord toSkill(String version) {
+		return new AnthropicSkillRecord(AnthropicSkillType.ANTHROPIC, this.skillId, version);
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillContainer.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillContainer.java
new file mode 100644
index 00000000000..09e976282e8
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillContainer.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.springframework.util.Assert;
+
+/**
+ * Container for Claude Skills in a chat completion request. Maximum of 8 skills per
+ * request.
+ *
+ * @author Soby Chacko
+ */
+public class AnthropicSkillContainer {
+
+	private final List<AnthropicSkillRecord> skills;
+
+	public AnthropicSkillContainer(List<AnthropicSkillRecord> skills) {
+		Assert.notNull(skills, "Skills list cannot be null");
+		Assert.notEmpty(skills, "Skills list cannot be empty");
+		if (skills.size() > 8) {
+			throw new IllegalArgumentException("Maximum of 8 skills per request. Provided: " + skills.size());
+		}
+		this.skills = Collections.unmodifiableList(new ArrayList<>(skills));
+	}
+
+	public List<AnthropicSkillRecord> getSkills() {
+		return this.skills;
+	}
+
+	/**
+	 * Convert to a list of maps suitable for JSON serialization via
+	 * {@code JsonValue.from(Map.of("skills", container.toSkillsList()))}.
+	 * @return list of skill maps with type, skill_id, and version keys
+	 */
+	public List<Map<String, Object>> toSkillsList() {
+		return this.skills.stream().map(AnthropicSkillRecord::toJsonMap).toList();
+	}
+
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	public static final class Builder {
+
+		private final List<AnthropicSkillRecord> skills = new ArrayList<>();
+
+		/**
+		 * Add a skill by its ID or name. Automatically detects whether it's a pre-built
+		 * Anthropic skill (xlsx, pptx, docx, pdf) or a custom skill ID.
+		 * @param skillIdOrName the skill ID or name
+		 * @return this builder
+		 */
+		public Builder skill(String skillIdOrName) {
+			Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
+			AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
+			if (prebuilt != null) {
+				return this.skill(prebuilt.toSkill());
+			}
+			return this.skill(new AnthropicSkillRecord(AnthropicSkillType.CUSTOM, skillIdOrName));
+		}
+
+		/**
+		 * Add a skill by its ID or name with a specific version.
+		 * @param skillIdOrName the skill ID or name
+		 * @param version the version (e.g., "latest", "20251013")
+		 * @return this builder
+		 */
+		public Builder skill(String skillIdOrName, String version) {
+			Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
+			Assert.hasText(version, "Version cannot be empty");
+			AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
+			if (prebuilt != null) {
+				return this.skill(prebuilt.toSkill(version));
+			}
+			return this.skill(new AnthropicSkillRecord(AnthropicSkillType.CUSTOM, skillIdOrName, version));
+		}
+
+		/**
+		 * Add a pre-built Anthropic skill using the enum.
+		 * @param skill the Anthropic skill enum value
+		 * @return this builder
+		 */
+		public Builder skill(AnthropicSkill skill) {
+			Assert.notNull(skill, "AnthropicSkill cannot be null");
+			return this.skill(skill.toSkill());
+		}
+
+		/**
+		 * Add a pre-built Anthropic skill with a specific version.
+		 * @param skill the Anthropic skill enum value
+		 * @param version the version
+		 * @return this builder
+		 */
+		public Builder skill(AnthropicSkill skill, String version) {
+			Assert.notNull(skill, "AnthropicSkill cannot be null");
+			Assert.hasText(version, "Version cannot be empty");
+			return this.skill(skill.toSkill(version));
+		}
+
+		/**
+		 * Add a skill record directly.
+		 * @param skill the skill record
+		 * @return this builder
+		 */
+		public Builder skill(AnthropicSkillRecord skill) {
+			Assert.notNull(skill, "Skill cannot be null");
+			this.skills.add(skill);
+			return this;
+		}
+
+		/**
+		 * Add multiple skills by their IDs or names.
+		 * @param skillIds the skill IDs or names
+		 * @return this builder
+		 */
+		public Builder skills(String... skillIds) {
+			Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
+			for (String skillId : skillIds) {
+				this.skill(skillId);
+			}
+			return this;
+		}
+
+		/**
+		 * Add multiple skills from a list of IDs or names.
+		 * @param skillIds the list of skill IDs or names
+		 * @return this builder
+		 */
+		public Builder skills(List<String> skillIds) {
+			Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
+			skillIds.forEach(this::skill);
+			return this;
+		}
+
+		public AnthropicSkillContainer build() {
+			return new AnthropicSkillContainer(new ArrayList<>(this.skills));
+		}
+
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillRecord.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillRecord.java
new file mode 100644
index 00000000000..84355df4a0b
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillRecord.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.util.Map;
+
+import org.jspecify.annotations.Nullable;
+
+import org.springframework.util.Assert;
+
+/**
+ * Represents a Claude Skill - either pre-built Anthropic skill or custom skill. Skills
+ * are collections of instructions, scripts, and resources that extend Claude's
+ * capabilities for specific domains.
+ *
+ * @author Soby Chacko
+ */
+public class AnthropicSkillRecord {
+
+	private final AnthropicSkillType type;
+
+	private final String skillId;
+
+	private final String version;
+
+	/**
+	 * Create a skill with a specific version.
+	 * @param type skill type
+	 * @param skillId skill identifier
+	 * @param version version string (e.g., "latest", "20251013")
+	 */
+	public AnthropicSkillRecord(AnthropicSkillType type, String skillId, String version) {
+		Assert.notNull(type, "Skill type cannot be null");
+		Assert.hasText(skillId, "Skill ID cannot be empty");
+		Assert.hasText(version, "Version cannot be empty");
+		this.type = type;
+		this.skillId = skillId;
+		this.version = version;
+	}
+
+	/**
+	 * Create a skill with default "latest" version.
+	 * @param type skill type
+	 * @param skillId skill identifier
+	 */
+	public AnthropicSkillRecord(AnthropicSkillType type, String skillId) {
+		this(type, skillId, "latest");
+	}
+
+	public AnthropicSkillType getType() {
+		return this.type;
+	}
+
+	public String getSkillId() {
+		return this.skillId;
+	}
+
+	public String getVersion() {
+		return this.version;
+	}
+
+	/**
+	 * Convert to a map suitable for JSON serialization via {@code JsonValue.from()}.
+	 * @return map with type, skill_id, and version keys
+	 */
+	public Map<String, Object> toJsonMap() {
+		return Map.of("type", this.type.getValue(), "skill_id", this.skillId, "version", this.version);
+	}
+
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	public static final class Builder {
+
+		private @Nullable AnthropicSkillType type;
+
+		private @Nullable String skillId;
+
+		private String version = "latest";
+
+		public Builder type(AnthropicSkillType type) {
+			this.type = type;
+			return this;
+		}
+
+		public Builder skillId(String skillId) {
+			this.skillId = skillId;
+			return this;
+		}
+
+		public Builder version(String version) {
+			this.version = version;
+			return this;
+		}
+
+		public AnthropicSkillRecord build() {
+			Assert.notNull(this.type, "Skill type cannot be null");
+			Assert.hasText(this.skillId, "Skill ID cannot be empty");
+			return new AnthropicSkillRecord(this.type, this.skillId, this.version);
+		}
+
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/package-info.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillType.java
similarity index 56%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/package-info.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillType.java
index bb4687ffd7c..d686f9b0409 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/package-info.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillType.java
@@ -14,7 +14,33 @@
  * limitations under the License.
  */
 
-@NullMarked
-package org.springframework.ai.anthropic.api;
+package org.springframework.ai.anthropic;
 
-import org.jspecify.annotations.NullMarked;
+/**
+ * Enum representing the type of a Claude Skill.
+ *
+ * @author Soby Chacko
+ */
+public enum AnthropicSkillType {
+
+	/**
+	 * Pre-built skills provided by Anthropic (xlsx, pptx, docx, pdf).
+	 */
+	ANTHROPIC("anthropic"),
+
+	/**
+	 * Custom skills uploaded to the workspace.
+	 */
+	CUSTOM("custom");
+
+	private final String value;
+
+	AnthropicSkillType(String value) {
+		this.value = value;
+	}
+
+	public String getValue() {
+		return this.value;
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelper.java
index f00a1c2058d..2cf3dc9dc3c 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelper.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelper.java
@@ -21,14 +21,19 @@
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.core.http.HttpResponse;
+import com.anthropic.models.beta.files.FileMetadata;
+import com.anthropic.models.messages.BashCodeExecutionOutputBlock;
+import com.anthropic.models.messages.BashCodeExecutionToolResultBlock;
+import com.anthropic.models.messages.CodeExecutionOutputBlock;
+import com.anthropic.models.messages.CodeExecutionToolResultBlock;
+import com.anthropic.models.messages.CodeExecutionToolResultBlockContent;
+import com.anthropic.models.messages.ContentBlock;
+import com.anthropic.models.messages.Message;
 import org.jspecify.annotations.Nullable;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.FileMetadata;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.util.Assert;
 
@@ -37,8 +42,9 @@
  * methods to extract file IDs, container IDs, and download files generated by Skills.
  *
  * <p>
- * This helper is specific to Anthropic's Skills API and requires the Anthropic API
- * client. It will not work with other AI providers.
+ * Unlike the RestClient module's helper which requires recursive Map/List crawling to
+ * find file IDs in untyped response structures, this SDK-based helper uses the SDK's
+ * typed {@link ContentBlock} variants with direct accessor methods.
  *
  * @author Soby Chacko
  * @since 2.0.0
@@ -46,70 +52,69 @@
 public final class AnthropicSkillsResponseHelper {
 
 	private AnthropicSkillsResponseHelper() {
-		// Utility class, no instantiation
 	}
 
 	/**
 	 * Extract all file IDs from a chat response. Searches through all content blocks in
-	 * the response, including those in the underlying AnthropicApi response metadata.
-	 * @param response The chat response to search
-	 * @return List of file IDs found in the response (empty list if none found)
+	 * the underlying SDK {@link Message} stored in response metadata.
+	 * @param response the chat response to search
+	 * @return list of file IDs found in the response (empty list if none found)
 	 */
-	public static List<String> extractFileIds(ChatResponse response) {
+	public static List<String> extractFileIds(@Nullable ChatResponse response) {
 		if (response == null) {
 			return List.of();
 		}
 
-		List<String> fileIds = new ArrayList<>();
+		Message message = getMessageFromMetadata(response);
+		if (message == null) {
+			return List.of();
+		}
 
-		// Try to get the underlying Anthropic response from ChatResponse metadata
-		if (response.getMetadata() != null) {
-			Object anthropicResponse = response.getMetadata().get("anthropic-response");
-			if (anthropicResponse instanceof ChatCompletionResponse chatCompletionResponse) {
-				fileIds.addAll(extractFileIdsFromContentBlocks(chatCompletionResponse.content()));
+		List<String> fileIds = new ArrayList<>();
+		for (ContentBlock block : message.content()) {
+			if (block.isContainerUpload()) {
+				fileIds.add(block.asContainerUpload().fileId());
+			}
+			else if (block.isBashCodeExecutionToolResult()) {
+				extractFileIdsFromBashResult(block.asBashCodeExecutionToolResult(), fileIds);
+			}
+			else if (block.isCodeExecutionToolResult()) {
+				extractFileIdsFromCodeExecutionResult(block.asCodeExecutionToolResult(), fileIds);
 			}
 		}
-
 		return fileIds;
 	}
 
 	/**
 	 * Extract container ID from a chat response for multi-turn conversation reuse.
-	 * @param response The chat response
-	 * @return Container ID if present, null otherwise
+	 * @param response the chat response
+	 * @return container ID if present, null otherwise
 	 */
 	public static @Nullable String extractContainerId(@Nullable ChatResponse response) {
 		if (response == null) {
 			return null;
 		}
 
-		// Try to get container from ChatResponse metadata
-		Object anthropicResponse = response.getMetadata().get("anthropic-response");
-		if (anthropicResponse instanceof ChatCompletionResponse chatCompletionResponse) {
-			if (chatCompletionResponse.container() != null) {
-				return chatCompletionResponse.container().id();
-			}
+		Message message = getMessageFromMetadata(response);
+		if (message == null) {
+			return null;
 		}
 
-		return null;
+		return message.container().map(container -> container.id()).orElse(null);
 	}
 
 	/**
 	 * Download all files from a Skills response to a target directory.
-	 *
-	 * <p>
-	 * <b>Note:</b> Existing files with the same name will be overwritten. Check for file
-	 * existence before calling if overwrite protection is needed.
-	 * @param response The chat response containing file IDs
-	 * @param api The Anthropic API client to use for downloading
-	 * @param targetDir Directory to save files (must exist)
-	 * @return List of paths to saved files
+	 * @param response the chat response containing file IDs
+	 * @param client the Anthropic client to use for downloading (beta files API)
+	 * @param targetDir directory to save files (must exist)
+	 * @return list of paths to saved files
 	 * @throws IOException if file download or saving fails
 	 */
-	public static List<Path> downloadAllFiles(ChatResponse response, AnthropicApi api, Path targetDir)
+	public static List<Path> downloadAllFiles(ChatResponse response, AnthropicClient client, Path targetDir)
 			throws IOException {
 		Assert.notNull(response, "Response cannot be null");
-		Assert.notNull(api, "AnthropicApi cannot be null");
+		Assert.notNull(client, "AnthropicClient cannot be null");
 		Assert.notNull(targetDir, "Target directory cannot be null");
 		Assert.isTrue(Files.isDirectory(targetDir), "Target path must be a directory");
 
@@ -117,79 +122,47 @@ public static List<Path> downloadAllFiles(ChatResponse response, AnthropicApi ap
 		List<Path> savedPaths = new ArrayList<>();
 
 		for (String fileId : fileIds) {
-			// Get metadata for filename
-			FileMetadata metadata = api.getFileMetadata(fileId);
-
-			// Download file
-			byte[] content = api.downloadFile(fileId);
-
-			// Save to target directory
-			Path filePath = targetDir.resolve(metadata.filename());
-			Files.write(filePath, content);
-			savedPaths.add(filePath);
+			FileMetadata metadata = client.beta().files().retrieveMetadata(fileId);
+			try (HttpResponse httpResponse = client.beta().files().download(fileId)) {
+				byte[] content = httpResponse.body().readAllBytes();
+				Path filePath = targetDir.resolve(metadata.filename());
+				Files.write(filePath, content);
+				savedPaths.add(filePath);
+			}
 		}
 
 		return savedPaths;
 	}
 
-	/**
-	 * Extract file IDs from a list of content blocks. Searches both direct file blocks
-	 * and nested content structures (for Skills tool results).
-	 * @param contentBlocks List of content blocks to search
-	 * @return List of file IDs found
-	 */
-	private static List<String> extractFileIdsFromContentBlocks(List<ContentBlock> contentBlocks) {
-		if (contentBlocks == null || contentBlocks.isEmpty()) {
-			return List.of();
-		}
-
-		List<String> fileIds = new ArrayList<>();
-
-		for (ContentBlock block : contentBlocks) {
-			// Check direct fileId field (top-level file blocks)
-			if (block.type() == ContentBlock.Type.FILE && block.fileId() != null) {
-				fileIds.add(block.fileId());
+	private static void extractFileIdsFromBashResult(BashCodeExecutionToolResultBlock resultBlock,
+			List<String> fileIds) {
+		BashCodeExecutionToolResultBlock.Content content = resultBlock.content();
+		if (content.isBashCodeExecutionResultBlock()) {
+			for (BashCodeExecutionOutputBlock outputBlock : content.asBashCodeExecutionResultBlock().content()) {
+				fileIds.add(outputBlock.fileId());
 			}
+		}
+	}
 
-			// Check nested content field (Skills tool results with complex JSON
-			// structures)
-			if (block.content() != null) {
-				fileIds.addAll(extractFileIdsFromObject(block.content()));
+	private static void extractFileIdsFromCodeExecutionResult(CodeExecutionToolResultBlock resultBlock,
+			List<String> fileIds) {
+		CodeExecutionToolResultBlockContent content = resultBlock.content();
+		if (content.isResultBlock()) {
+			for (CodeExecutionOutputBlock outputBlock : content.asResultBlock().content()) {
+				fileIds.add(outputBlock.fileId());
 			}
 		}
-
-		return fileIds;
 	}
 
-	/**
-	 * Recursively extract file IDs from any object structure. Handles nested Maps and
-	 * Lists that may contain file_id fields deep in the structure (e.g., Skills
-	 * bash_code_execution_tool_result responses).
-	 * @param obj The object to search (can be Map, List, String, or other types)
-	 * @return List of file IDs found in the object structure
-	 */
-	private static List<String> extractFileIdsFromObject(Object obj) {
-		List<String> fileIds = new ArrayList<>();
-
-		if (obj instanceof Map<?, ?> map) {
-			// Check if this map has a file_id key
-			if (map.containsKey("file_id") && map.get("file_id") instanceof String fileId) {
-				fileIds.add(fileId);
-			}
-			// Recursively search all values in the map
-			for (Object value : map.values()) {
-				fileIds.addAll(extractFileIdsFromObject(value));
-			}
+	private static @Nullable Message getMessageFromMetadata(ChatResponse response) {
+		if (response.getMetadata() == null) {
+			return null;
 		}
-		else if (obj instanceof List<?> list) {
-			// Recursively search all list items
-			for (Object item : list) {
-				fileIds.addAll(extractFileIdsFromObject(item));
-			}
+		Object anthropicResponse = response.getMetadata().get("anthropic-response");
+		if (anthropicResponse instanceof Message message) {
+			return message;
 		}
-		// For String, Number, etc., there are no file_ids to extract
-
-		return fileIds;
+		return null;
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheBreakpointTracker.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheBreakpointTracker.java
similarity index 95%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheBreakpointTracker.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheBreakpointTracker.java
index 458dfb484c9..455fe4efd40 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheBreakpointTracker.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheBreakpointTracker.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api.utils;
+package org.springframework.ai.anthropic;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -24,6 +24,8 @@
  * request has its own instance.
  *
  * @author Austin Dase
+ * @author Soby Chacko
+ * @since 1.1.0
  */
 class CacheBreakpointTracker {
 
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheEligibilityResolver.java
similarity index 62%
rename from models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
rename to models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheEligibilityResolver.java
index 00f16d3c0be..18fe01ec32c 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/CacheEligibilityResolver.java
@@ -14,48 +14,37 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api.utils;
+package org.springframework.ai.anthropic;
 
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Function;
 
+import com.anthropic.models.messages.CacheControlEphemeral;
 import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheStrategy;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
-import org.springframework.ai.anthropic.api.AnthropicCacheType;
 import org.springframework.ai.chat.messages.MessageType;
 import org.springframework.util.Assert;
 
 /**
  * Resolves cache eligibility for messages based on the provided
- * {@link AnthropicCacheOptions}.
- *
- * Note: Tool definition messages are always considered for caching if the strategy
- * includes system messages. The minimum content length check is not applied to tool
- * definition messages.
+ * {@link AnthropicCacheOptions}. Returns SDK {@link CacheControlEphemeral} instances
+ * instead of raw cache control records.
  *
  * @author Austin Dase
  * @author Soby Chacko
  * @since 1.1.0
- **/
+ */
 public class CacheEligibilityResolver {
 
 	private static final Logger logger = LoggerFactory.getLogger(CacheEligibilityResolver.class);
 
-	// Tool definition messages are always considered for caching if the strategy
-	// includes system messages.
 	private static final MessageType TOOL_DEFINITION_MESSAGE_TYPE = MessageType.SYSTEM;
 
 	private final CacheBreakpointTracker cacheBreakpointTracker = new CacheBreakpointTracker();
 
-	private final AnthropicCacheType anthropicCacheType = AnthropicCacheType.EPHEMERAL;
-
 	private final AnthropicCacheStrategy cacheStrategy;
 
 	private final Map<MessageType, AnthropicCacheTtl> messageTypeTtl;
@@ -76,24 +65,23 @@ public CacheEligibilityResolver(AnthropicCacheStrategy cacheStrategy,
 		this.cacheEligibleMessageTypes = cacheEligibleMessageTypes;
 	}
 
-	public static CacheEligibilityResolver from(AnthropicCacheOptions anthropicCacheOptions) {
-		AnthropicCacheStrategy strategy = anthropicCacheOptions.getStrategy();
-		return new CacheEligibilityResolver(strategy, anthropicCacheOptions.getMessageTypeTtl(),
-				anthropicCacheOptions.getMessageTypeMinContentLengths(),
-				anthropicCacheOptions.getContentLengthFunction(), extractEligibleMessageTypes(strategy));
+	public static CacheEligibilityResolver from(AnthropicCacheOptions cacheOptions) {
+		AnthropicCacheStrategy strategy = cacheOptions.getStrategy();
+		return new CacheEligibilityResolver(strategy, cacheOptions.getMessageTypeTtl(),
+				cacheOptions.getMessageTypeMinContentLengths(), cacheOptions.getContentLengthFunction(),
+				extractEligibleMessageTypes(strategy));
 	}
 
-	private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrategy anthropicCacheStrategy) {
-		return switch (anthropicCacheStrategy) {
+	private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrategy strategy) {
+		return switch (strategy) {
 			case NONE -> Set.of();
 			case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
-			case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
+			case TOOLS_ONLY -> Set.of();
 			case CONVERSATION_HISTORY -> Set.of(MessageType.values());
 		};
 	}
 
-	public AnthropicApi.ChatCompletionRequest.@Nullable CacheControl resolve(MessageType messageType,
-			@Nullable String content) {
+	public @Nullable CacheControlEphemeral resolve(MessageType messageType, @Nullable String content) {
 		Integer length = this.contentLengthFunction.apply(content);
 		Integer minLength = this.messageTypeMinContentLengths.get(messageType);
 		Assert.state(minLength != null, "The minimum content length of the message type must be defined");
@@ -105,18 +93,15 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
 			return null;
 		}
 
-		AnthropicCacheTtl anthropicCacheTtl = this.messageTypeTtl.get(messageType);
-		Assert.state(anthropicCacheTtl != null, "The message type ttl of the message type must be defined");
+		AnthropicCacheTtl cacheTtl = this.messageTypeTtl.get(messageType);
+		Assert.state(cacheTtl != null, "The message type ttl of the message type must be defined");
 
-		logger.debug("Caching enabled for messageType={}, ttl={}", messageType, anthropicCacheTtl);
+		logger.debug("Caching enabled for messageType={}, ttl={}", messageType, cacheTtl);
 
-		return this.anthropicCacheType.cacheControl(anthropicCacheTtl.getValue());
+		return CacheControlEphemeral.builder().ttl(cacheTtl.getSdkTtl()).build();
 	}
 
-	public AnthropicApi.ChatCompletionRequest.@Nullable CacheControl resolveToolCacheControl() {
-		// Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
-		// CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
-		// relying on Anthropic's cache hierarchy to implicitly cache tools.
+	public @Nullable CacheControlEphemeral resolveToolCacheControl() {
 		if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
 				&& this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
 				&& this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
@@ -130,12 +115,12 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
 			return null;
 		}
 
-		AnthropicCacheTtl anthropicCacheTtl = this.messageTypeTtl.get(TOOL_DEFINITION_MESSAGE_TYPE);
-		Assert.state(anthropicCacheTtl != null, "messageTypeTtl must contain a 'system' entry");
+		AnthropicCacheTtl cacheTtl = this.messageTypeTtl.get(TOOL_DEFINITION_MESSAGE_TYPE);
+		Assert.state(cacheTtl != null, "messageTypeTtl must contain a 'system' entry");
 
-		logger.debug("Caching enabled for tool definition, ttl={}", anthropicCacheTtl);
+		logger.debug("Caching enabled for tool definition, ttl={}", cacheTtl);
 
-		return this.anthropicCacheType.cacheControl(anthropicCacheTtl.getValue());
+		return CacheControlEphemeral.builder().ttl(cacheTtl.getSdkTtl()).build();
 	}
 
 	public boolean isCachingEnabled() {
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/Citation.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/Citation.java
index 4d80885951b..a8dc187a47d 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/Citation.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/Citation.java
@@ -18,7 +18,6 @@
 
 import org.jspecify.annotations.Nullable;
 
-import org.springframework.ai.anthropic.api.CitationDocument;
 import org.springframework.util.Assert;
 
 /**
@@ -44,23 +43,9 @@
  * start/end indices</li>
  * </ul>
  *
- * <h3>Example Usage</h3>
- *
- * <pre>{@code
- * ChatResponse response = chatModel.call(prompt);
- *
- * List<Citation> citations = (List<Citation>) response.getMetadata().get("citations");
- *
- * for (Citation citation : citations) {
- *     System.out.println("Document: " + citation.getDocumentTitle());
- *     System.out.println("Location: " + citation.getLocationDescription());
- *     System.out.println("Text: " + citation.getCitedText());
- * }
- * }</pre>
- *
  * @author Soby Chacko
  * @since 1.1.0
- * @see CitationDocument
+ * @see AnthropicCitationDocument
  */
 public final class Citation {
 
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHints.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHints.java
deleted file mode 100644
index a2a0bdca740..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHints.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.aot;
-
-import org.jspecify.annotations.Nullable;
-
-import org.springframework.aot.hint.MemberCategory;
-import org.springframework.aot.hint.RuntimeHints;
-import org.springframework.aot.hint.RuntimeHintsRegistrar;
-
-import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage;
-
-/**
- * The AnthropicRuntimeHints class is responsible for registering runtime hints for
- * Anthropic API classes.
- *
- * @author Christian Tzolov
- * @since 1.0.0
- */
-public class AnthropicRuntimeHints implements RuntimeHintsRegistrar {
-
-	@Override
-	public void registerHints(RuntimeHints hints, @Nullable ClassLoader classLoader) {
-		var mcs = MemberCategory.values();
-
-		for (var tr : findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic")) {
-			hints.reflection().registerType(tr, mcs);
-		}
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/package-info.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/package-info.java
deleted file mode 100644
index b60817a9543..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/aot/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-@NullMarked
-package org.springframework.ai.anthropic.aot;
-
-import org.jspecify.annotations.NullMarked;
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
deleted file mode 100644
index 8ffb33fdd7d..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
+++ /dev/null
@@ -1,2433 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.Consumer;
-import java.util.function.Predicate;
-
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonInclude.Include;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.annotation.JsonSubTypes;
-import com.fasterxml.jackson.annotation.JsonTypeInfo;
-import org.jspecify.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import reactor.core.publisher.Flux;
-import reactor.core.publisher.Mono;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
-import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder;
-import org.springframework.ai.model.ApiKey;
-import org.springframework.ai.model.ChatModelDescription;
-import org.springframework.ai.model.ModelOptionsUtils;
-import org.springframework.ai.model.SimpleApiKey;
-import org.springframework.ai.observation.conventions.AiProvider;
-import org.springframework.ai.retry.RetryUtils;
-import org.springframework.http.HttpHeaders;
-import org.springframework.http.HttpStatusCode;
-import org.springframework.http.MediaType;
-import org.springframework.http.ResponseEntity;
-import org.springframework.util.Assert;
-import org.springframework.util.StringUtils;
-import org.springframework.web.client.ResponseErrorHandler;
-import org.springframework.web.client.RestClient;
-import org.springframework.web.reactive.function.client.WebClient;
-
-/**
- * The Anthropic API client.
- *
- * @author Christian Tzolov
- * @author Mariusz Bernacki
- * @author Thomas Vitale
- * @author Jihoon Kim
- * @author Alexandros Pappas
- * @author Jonghoon Park
- * @author Claudio Silva Junior
- * @author Filip Hrisafov
- * @author Soby Chacko
- * @author Austin Dase
- * @since 1.0.0
- */
-public final class AnthropicApi {
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicApi.class);
-
-	public static Builder builder() {
-		return new Builder();
-	}
-
-	public static final String PROVIDER_NAME = AiProvider.ANTHROPIC.value();
-
-	public static final String DEFAULT_BASE_URL = "https://api.anthropic.com";
-
-	public static final String DEFAULT_MESSAGE_COMPLETIONS_PATH = "/v1/messages";
-
-	public static final String FILES_PATH = "/v1/files";
-
-	public static final String DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
-
-	public static final String DEFAULT_ANTHROPIC_BETA_VERSION = "tools-2024-04-04,pdfs-2024-09-25,structured-outputs-2025-11-13";
-
-	public static final String BETA_EXTENDED_CACHE_TTL = "extended-cache-ttl-2025-04-11";
-
-	public static final String BETA_SKILLS = "skills-2025-10-02";
-
-	public static final String BETA_FILES_API = "files-api-2025-04-14";
-
-	public static final String BETA_CODE_EXECUTION = "code-execution-2025-08-25";
-
-	public static final String CODE_EXECUTION_TOOL_TYPE = "code_execution_20250825";
-
-	private static final String HEADER_X_API_KEY = "x-api-key";
-
-	private static final String HEADER_ANTHROPIC_VERSION = "anthropic-version";
-
-	private static final String HEADER_ANTHROPIC_BETA = "anthropic-beta";
-
-	private static final Predicate<String> SSE_DONE_PREDICATE = "[DONE]"::equals;
-
-	private final String completionsPath;
-
-	private final RestClient restClient;
-
-	private final StreamHelper streamHelper = new StreamHelper();
-
-	private final WebClient webClient;
-
-	private final ApiKey apiKey;
-
-	/**
-	 * Create a new client api.
-	 * @param baseUrl api base URL.
-	 * @param completionsPath path to append to the base URL.
-	 * @param anthropicApiKey Anthropic api Key.
-	 * @param anthropicVersion Anthropic version.
-	 * @param restClientBuilder RestClient builder.
-	 * @param webClientBuilder WebClient builder.
-	 * @param responseErrorHandler Response error handler.
-	 * @param anthropicBetaFeatures Anthropic beta features.
-	 */
-	private AnthropicApi(String baseUrl, String completionsPath, ApiKey anthropicApiKey, String anthropicVersion,
-			RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder,
-			ResponseErrorHandler responseErrorHandler, String anthropicBetaFeatures) {
-
-		Consumer<HttpHeaders> jsonContentHeaders = headers -> {
-			headers.add(HEADER_ANTHROPIC_VERSION, anthropicVersion);
-			headers.add(HEADER_ANTHROPIC_BETA, anthropicBetaFeatures);
-			headers.setContentType(MediaType.APPLICATION_JSON);
-		};
-
-		this.completionsPath = completionsPath;
-		this.apiKey = anthropicApiKey;
-
-		this.restClient = restClientBuilder.clone()
-			.baseUrl(baseUrl)
-			.defaultHeaders(jsonContentHeaders)
-			.defaultStatusHandler(responseErrorHandler)
-			.build();
-
-		this.webClient = webClientBuilder.clone()
-			.baseUrl(baseUrl)
-			.defaultHeaders(jsonContentHeaders)
-			.defaultStatusHandler(HttpStatusCode::isError,
-					resp -> resp.bodyToMono(String.class)
-						.flatMap(it -> Mono.error(new RuntimeException(
-								"Response exception, Status: [" + resp.statusCode() + "], Body:[" + it + "]"))))
-			.build();
-	}
-
-	/**
-	 * Create a new client api.
-	 * @param completionsPath path to append to the base URL.
-	 * @param restClient RestClient instance.
-	 * @param webClient WebClient instance.
-	 * @param apiKey Anthropic api Key.
-	 */
-	public AnthropicApi(String completionsPath, RestClient restClient, WebClient webClient, ApiKey apiKey) {
-		this.completionsPath = completionsPath;
-		this.restClient = restClient;
-		this.webClient = webClient;
-		this.apiKey = apiKey;
-	}
-
-	/**
-	 * Creates a model response for the given chat conversation.
-	 * @param chatRequest The chat completion request.
-	 * @return Entity response with {@link ChatCompletionResponse} as a body and HTTP
-	 * status code and headers.
-	 */
-	public ResponseEntity<ChatCompletionResponse> chatCompletionEntity(ChatCompletionRequest chatRequest) {
-		return chatCompletionEntity(chatRequest, new HttpHeaders());
-	}
-
-	/**
-	 * Creates a model response for the given chat conversation.
-	 * @param chatRequest The chat completion request.
-	 * @param additionalHttpHeader Additional HTTP headers.
-	 * @return Entity response with {@link ChatCompletionResponse} as a body and HTTP
-	 * status code and headers.
-	 */
-	public ResponseEntity<ChatCompletionResponse> chatCompletionEntity(ChatCompletionRequest chatRequest,
-			HttpHeaders additionalHttpHeader) {
-
-		Assert.notNull(chatRequest, "The chat request must not be null");
-		Assert.isTrue(Boolean.FALSE.equals(chatRequest.stream()),
-				"The Chat request must set the stream property to false");
-		Assert.notNull(additionalHttpHeader, "Additional HTTP headers must not be null");
-
-		// @formatter:off
-		return this.restClient.post()
-			.uri(this.completionsPath)
-			.headers(headers -> {
-				headers.addAll(additionalHttpHeader);
-				addDefaultHeadersIfMissing(headers);
-			})
-			.body(chatRequest)
-			.retrieve()
-			.toEntity(ChatCompletionResponse.class);
-		// @formatter:on
-	}
-
-	/**
-	 * Creates a streaming chat response for the given chat conversation.
-	 * @param chatRequest The chat completion request. Must have the stream property set
-	 * to true.
-	 * @return Returns a {@link Flux} stream from chat completion chunks.
-	 */
-	public Flux<ChatCompletionResponse> chatCompletionStream(ChatCompletionRequest chatRequest) {
-		return chatCompletionStream(chatRequest, new HttpHeaders());
-	}
-
-	/**
-	 * Creates a streaming chat response for the given chat conversation.
-	 * @param chatRequest The chat completion request. Must have the stream property set
-	 * to true.
-	 * @param additionalHttpHeader Additional HTTP headers.
-	 * @return Returns a {@link Flux} stream from chat completion chunks.
-	 */
-	public Flux<ChatCompletionResponse> chatCompletionStream(ChatCompletionRequest chatRequest,
-			HttpHeaders additionalHttpHeader) {
-
-		Assert.notNull(chatRequest, "The chat request body must not be null.");
-		Assert.isTrue(Boolean.TRUE.equals(chatRequest.stream()),
-				"The chat request must set the stream property to true");
-		Assert.notNull(additionalHttpHeader, "The additional HTTP headers can not be null");
-
-		AtomicBoolean isInsideTool = new AtomicBoolean(false);
-
-		AtomicReference<ChatCompletionResponseBuilder> chatCompletionReference = new AtomicReference<>();
-
-		// @formatter:off
-		return this.webClient.post()
-			.uri(this.completionsPath)
-			.headers(headers -> {
-				headers.addAll(additionalHttpHeader);
-				addDefaultHeadersIfMissing(headers);
-			}) // @formatter:off
-			.body(Mono.just(chatRequest), ChatCompletionRequest.class)
-			.retrieve()
-			.bodyToFlux(String.class)
-			.takeUntil(SSE_DONE_PREDICATE)
-			.filter(SSE_DONE_PREDICATE.negate())
-			.map(content -> ModelOptionsUtils.jsonToObject(content, StreamEvent.class))
-			.filter(event -> event.type() != EventType.PING)
-			// Detect if the chunk is part of a streaming function call.
-			.map(event -> {
-				logger.debug("Received event: {}", event);
-
-				if (this.streamHelper.isToolUseStart(event)) {
-					isInsideTool.set(true);
-				}
-				return event;
-			})
-			// Group all chunks belonging to the same function call.
-			.windowUntil(event -> {
-				if (isInsideTool.get() && this.streamHelper.isToolUseFinish(event)) {
-					isInsideTool.set(false);
-					return true;
-				}
-				return !isInsideTool.get();
-			})
-			// Merging the window chunks into a single chunk.
-			.concatMapIterable(window -> {
-				Mono<StreamEvent> monoChunk = window.reduce(new ToolUseAggregationEvent(),
-						this.streamHelper::mergeToolUseEvents);
-				return List.of(monoChunk);
-			})
-			.flatMap(mono -> mono)
-			.map(event -> this.streamHelper.eventToChatCompletionResponse(event, chatCompletionReference))
-			.filter(chatCompletionResponse -> chatCompletionResponse.type() != null);
-	}
-
-	// ------------------------------------------------------------------------
-	// Files API Methods
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Get metadata for a specific file generated by Skills or uploaded via Files API.
-	 * @param fileId The file ID to retrieve (format: file_*)
-	 * @return File metadata including filename, size, mime type, and expiration
-	 */
-	public FileMetadata getFileMetadata(String fileId) {
-		Assert.hasText(fileId, "File ID cannot be empty");
-
-		return Objects.requireNonNull(this.restClient.get()
-			.uri(FILES_PATH + "/{id}", fileId)
-			.headers(headers -> {
-				addDefaultHeadersIfMissing(headers);
-				// Append files-api beta to existing beta headers if not already present
-				String existingBeta = headers.getFirst(HEADER_ANTHROPIC_BETA);
-				if (existingBeta != null && !existingBeta.contains(BETA_FILES_API)) {
-					headers.set(HEADER_ANTHROPIC_BETA, existingBeta + "," + BETA_FILES_API);
-				}
-				else if (existingBeta == null) {
-					headers.set(HEADER_ANTHROPIC_BETA, BETA_FILES_API);
-				}
-			})
-			.retrieve()
-			.body(FileMetadata.class));
-	}
-
-	/**
-	 * Download file content as byte array. Suitable for small to medium files.
-	 * @param fileId The file ID to download
-	 * @return File content as bytes
-	 */
-	public byte[] downloadFile(String fileId) {
-		Assert.hasText(fileId, "File ID cannot be empty");
-
-		return Objects.requireNonNull(this.restClient.get()
-			.uri(FILES_PATH + "/{id}/content", fileId)
-			.headers(headers -> {
-				addDefaultHeadersIfMissing(headers);
-				// Append files-api beta to existing beta headers if not already present
-				String existingBeta = headers.getFirst(HEADER_ANTHROPIC_BETA);
-				if (existingBeta != null && !existingBeta.contains(BETA_FILES_API)) {
-					headers.set(HEADER_ANTHROPIC_BETA, existingBeta + "," + BETA_FILES_API);
-				}
-				else if (existingBeta == null) {
-					headers.set(HEADER_ANTHROPIC_BETA, BETA_FILES_API);
-				}
-			})
-			.retrieve()
-			.body(byte[].class));
-	}
-
-	/**
-	 * List all files with optional pagination.
-	 * @param limit Maximum number of results per page (default 20, max 100)
-	 * @param page Pagination token from previous response
-	 * @return Paginated list of files
-	 */
-	public FilesListResponse listFiles(@Nullable Integer limit, @Nullable String page) {
-		return Objects.requireNonNull(this.restClient.get()
-			.uri(uriBuilder -> {
-				uriBuilder.path(FILES_PATH);
-				if (limit != null) {
-					uriBuilder.queryParam("limit", limit);
-				}
-				if (page != null) {
-					uriBuilder.queryParam("page", page);
-				}
-				return uriBuilder.build();
-			})
-			.retrieve()
-			.body(FilesListResponse.class));
-	}
-
-	/**
-	 * Delete a file. Files expire automatically after 24 hours, but this allows
-	 * immediate cleanup.
-	 * @param fileId The file ID to delete
-	 */
-	public void deleteFile(String fileId) {
-		Assert.hasText(fileId, "File ID cannot be empty");
-
-		this.restClient.delete().uri(FILES_PATH + "/{id}", fileId).retrieve().toBodilessEntity();
-	}
-
-	// ------------------------------------------------------------------------
-	// Private Helper Methods
-	// ------------------------------------------------------------------------
-
-	private void addDefaultHeadersIfMissing(HttpHeaders headers) {
-		if (!headers.containsHeader(HEADER_X_API_KEY)) {
-			String apiKeyValue = this.apiKey.getValue();
-			if (StringUtils.hasText(apiKeyValue)) {
-				headers.add(HEADER_X_API_KEY, apiKeyValue);
-			}
-		}
-	}
-
-	/**
-	 * Check the <a href="https://docs.anthropic.com/claude/docs/models-overview">Models
-	 * overview</a> and <a href=
-	 * "https://docs.anthropic.com/claude/docs/models-overview#model-comparison">model
-	 * comparison</a> for additional details and options.
-	 */
-	public enum ChatModel implements ChatModelDescription {
-
-		// @formatter:off
-		/**
-		 * The claude-opus-4-6 model.
-		 */
-		CLAUDE_OPUS_4_6("claude-opus-4-6"),
-
-		/**
-		 * The claude-sonnet-4-6 model.
-		 */
-		CLAUDE_SONNET_4_6("claude-sonnet-4-6"),
-
-		/**
-		 * The claude-haiku-4-5 model.
-		 */
-		CLAUDE_HAIKU_4_5("claude-haiku-4-5"),
-
-		/**
-		 * The claude-sonnet-4-5 model.
-		 */
-		CLAUDE_SONNET_4_5("claude-sonnet-4-5"),
-
-		/**
-		 * The claude-opus-4-5 model.
-		 */
-		CLAUDE_OPUS_4_5("claude-opus-4-5"),
-
-		/**
-		 * The claude-opus-4-1 model.
-		 */
-		CLAUDE_OPUS_4_1("claude-opus-4-1"),
-
-		/**
-		 * The claude-sonnet-4-0 model.
-		 */
-		CLAUDE_SONNET_4_0("claude-sonnet-4-0"),
-
-		/**
-		 * The claude-opus-4-0 model.
-		 */
-		CLAUDE_OPUS_4_0("claude-opus-4-0");
-
-		// @formatter:on
-
-		private final String value;
-
-		ChatModel(String value) {
-			this.value = value;
-		}
-
-		/**
-		 * Get the value of the model.
-		 * @return The value of the model.
-		 */
-		public String getValue() {
-			return this.value;
-		}
-
-		/**
-		 * Get the name of the model.
-		 * @return The name of the model.
-		 */
-		@Override
-		public String getName() {
-			return this.value;
-		}
-
-	}
-
-	/**
-	 * The role of the author of this message.
-	 */
-	public enum Role {
-
-		// @formatter:off
-		/**
-		 * The user role.
-		  */
-		@JsonProperty("user")
-		USER,
-
-		/**
-		 * The assistant role.
-		 */
-		@JsonProperty("assistant")
-		ASSISTANT
-		// @formatter:on
-
-	}
-
-	/**
-	 * The thinking type.
-	 */
-	public enum ThinkingType {
-
-		/**
-		 * Enabled thinking type.
-		 */
-		@JsonProperty("enabled")
-		ENABLED,
-
-		/**
-		 * Disabled thinking type.
-		 */
-		@JsonProperty("disabled")
-		DISABLED
-
-	}
-
-	/**
-	 * Types of Claude Skills.
-	 */
-	public enum SkillType {
-
-		/**
-		 * Pre-built skills provided by Anthropic (xlsx, pptx, docx, pdf).
-		 */
-		@JsonProperty("anthropic")
-		ANTHROPIC("anthropic"),
-
-		/**
-		 * Custom skills uploaded to the workspace.
-		 */
-		@JsonProperty("custom")
-		CUSTOM("custom");
-
-		private final String value;
-
-		SkillType(String value) {
-			this.value = value;
-		}
-
-		public String getValue() {
-			return this.value;
-		}
-
-	}
-
-	/**
-	 * Pre-built Anthropic Skills for document generation.
-	 */
-	public enum AnthropicSkill {
-
-		// @formatter:off
-		/**
-		 * Excel spreadsheet generation and manipulation.
-		 */
-		XLSX("xlsx", "Excel spreadsheet generation"),
-
-		/**
-		 * PowerPoint presentation creation.
-		 */
-		PPTX("pptx", "PowerPoint presentation creation"),
-
-		/**
-		 * Word document generation.
-		 */
-		DOCX("docx", "Word document generation"),
-
-		/**
-		 * PDF document creation.
-		 */
-		PDF("pdf", "PDF document creation");
-		// @formatter:on
-
-		private static final Map<String, AnthropicSkill> BY_ID;
-
-		static {
-			Map<String, AnthropicSkill> map = new HashMap<>();
-			for (AnthropicSkill skill : values()) {
-				map.put(skill.skillId.toLowerCase(), skill);
-			}
-			BY_ID = Collections.unmodifiableMap(map);
-		}
-
-		private final String skillId;
-
-		private final String description;
-
-		AnthropicSkill(String skillId, String description) {
-			this.skillId = skillId;
-			this.description = description;
-		}
-
-		/**
-		 * Look up a pre-built Anthropic skill by its ID.
-		 * @param skillId The skill ID (e.g., "xlsx", "pptx", "docx", "pdf")
-		 * @return The matching AnthropicSkill, or null if not found
-		 */
-		public static @Nullable AnthropicSkill fromId(@Nullable String skillId) {
-			if (skillId == null) {
-				return null;
-			}
-			return BY_ID.get(skillId.toLowerCase());
-		}
-
-		public String getSkillId() {
-			return this.skillId;
-		}
-
-		public String getDescription() {
-			return this.description;
-		}
-
-		/**
-		 * Convert to a Skill record with latest version.
-		 * @return Skill record
-		 */
-		public Skill toSkill() {
-			return new Skill(SkillType.ANTHROPIC, this.skillId, "latest");
-		}
-
-		/**
-		 * Convert to a Skill record with specific version.
-		 * @param version Version string
-		 * @return Skill record
-		 */
-		public Skill toSkill(String version) {
-			return new Skill(SkillType.ANTHROPIC, this.skillId, version);
-		}
-
-	}
-
-	/**
-	 * Represents a Claude Skill - either pre-built Anthropic skill or custom skill.
-	 * Skills are collections of instructions, scripts, and resources that extend Claude's
-	 * capabilities for specific domains.
-	 *
-	 * @param type The skill type: "anthropic" for pre-built skills, "custom" for uploaded
-	 * skills
-	 * @param skillId Skill identifier - short name for Anthropic skills (e.g., "xlsx",
-	 * "pptx"), generated ID for custom skills
-	 * @param version Optional version - "latest", date-based (e.g., "20251013"), or epoch
-	 * timestamp
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record Skill(@JsonProperty("type") SkillType type, @JsonProperty("skill_id") String skillId,
-			@JsonProperty("version") String version) {
-
-		/**
-		 * Create a Skill with default "latest" version.
-		 * @param type Skill type
-		 * @param skillId Skill ID
-		 */
-		public Skill(SkillType type, String skillId) {
-			this(type, skillId, "latest");
-		}
-
-		public static SkillBuilder builder() {
-			return new SkillBuilder();
-		}
-
-		public static final class SkillBuilder {
-
-			private @Nullable SkillType type;
-
-			private @Nullable String skillId;
-
-			private String version = "latest";
-
-			public SkillBuilder type(SkillType type) {
-				this.type = type;
-				return this;
-			}
-
-			public SkillBuilder skillId(String skillId) {
-				this.skillId = skillId;
-				return this;
-			}
-
-			public SkillBuilder version(String version) {
-				this.version = version;
-				return this;
-			}
-
-			public Skill build() {
-				Assert.notNull(this.type, "Skill type cannot be null");
-				Assert.hasText(this.skillId, "Skill ID cannot be empty");
-				return new Skill(this.type, this.skillId, this.version);
-			}
-
-		}
-
-	}
-
-	/**
-	 * Container for Claude Skills in a chat completion request. Maximum of 8 skills per
-	 * request.
-	 *
-	 * @param skills List of skills to make available to Claude
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record SkillContainer(@JsonProperty("skills") List<Skill> skills) {
-
-		public SkillContainer {
-			Assert.notNull(skills, "Skills list cannot be null");
-			Assert.notEmpty(skills, "Skills list cannot be empty");
-			if (skills.size() > 8) {
-				throw new IllegalArgumentException("Maximum of 8 skills per request. Provided: " + skills.size());
-			}
-		}
-
-		public static SkillContainerBuilder builder() {
-			return new SkillContainerBuilder();
-		}
-
-		public static final class SkillContainerBuilder {
-
-			private final List<Skill> skills = new ArrayList<>();
-
-			/**
-			 * Add a skill by its ID or name. Automatically detects whether it's a
-			 * pre-built Anthropic skill (xlsx, pptx, docx, pdf) or a custom skill ID.
-			 * @param skillIdOrName The skill ID or name
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skill(String skillIdOrName) {
-				Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
-				AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
-				if (prebuilt != null) {
-					return this.skill(prebuilt.toSkill());
-				}
-				return this.skill(new Skill(SkillType.CUSTOM, skillIdOrName));
-			}
-
-			/**
-			 * Add a skill by its ID or name with a specific version.
-			 * @param skillIdOrName The skill ID or name
-			 * @param version The version (e.g., "latest", "20251013")
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skill(String skillIdOrName, String version) {
-				Assert.hasText(skillIdOrName, "Skill ID or name cannot be empty");
-				Assert.hasText(version, "Version cannot be empty");
-				AnthropicSkill prebuilt = AnthropicSkill.fromId(skillIdOrName);
-				if (prebuilt != null) {
-					return this.skill(prebuilt.toSkill(version));
-				}
-				return this.skill(new Skill(SkillType.CUSTOM, skillIdOrName, version));
-			}
-
-			/**
-			 * Add a pre-built Anthropic skill using the enum.
-			 * @param skill The Anthropic skill enum value
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skill(AnthropicSkill skill) {
-				Assert.notNull(skill, "AnthropicSkill cannot be null");
-				return this.skill(skill.toSkill());
-			}
-
-			/**
-			 * Add a pre-built Anthropic skill with a specific version.
-			 * @param skill The Anthropic skill enum value
-			 * @param version The version
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skill(AnthropicSkill skill, String version) {
-				Assert.notNull(skill, "AnthropicSkill cannot be null");
-				Assert.hasText(version, "Version cannot be empty");
-				return this.skill(skill.toSkill(version));
-			}
-
-			/**
-			 * Add a Skill record directly.
-			 * @param skill The skill record
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skill(Skill skill) {
-				Assert.notNull(skill, "Skill cannot be null");
-				this.skills.add(skill);
-				return this;
-			}
-
-			/**
-			 * Add multiple skills by their IDs or names.
-			 * @param skillIds The skill IDs or names
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skills(String... skillIds) {
-				Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
-				for (String skillId : skillIds) {
-					this.skill(skillId);
-				}
-				return this;
-			}
-
-			/**
-			 * Add multiple skills from a list of IDs or names.
-			 * @param skillIds The list of skill IDs or names
-			 * @return this builder
-			 */
-			public SkillContainerBuilder skills(List<String> skillIds) {
-				Assert.notEmpty(skillIds, "Skill IDs cannot be empty");
-				skillIds.forEach(this::skill);
-				return this;
-			}
-
-			/**
-			 * Add a pre-built Anthropic skill.
-			 * @param skill The Anthropic skill enum value
-			 * @return this builder
-			 * @deprecated Use {@link #skill(AnthropicSkill)} instead
-			 */
-			@Deprecated
-			public SkillContainerBuilder anthropicSkill(AnthropicSkill skill) {
-				return this.skill(skill);
-			}
-
-			/**
-			 * Add a pre-built Anthropic skill with version.
-			 * @param skill The Anthropic skill enum value
-			 * @param version The version
-			 * @return this builder
-			 * @deprecated Use {@link #skill(AnthropicSkill, String)} instead
-			 */
-			@Deprecated
-			public SkillContainerBuilder anthropicSkill(AnthropicSkill skill, String version) {
-				return this.skill(skill, version);
-			}
-
-			/**
-			 * Add a custom skill by ID.
-			 * @param skillId The custom skill ID
-			 * @return this builder
-			 * @deprecated Use {@link #skill(String)} instead
-			 */
-			@Deprecated
-			public SkillContainerBuilder customSkill(String skillId) {
-				return this.skill(skillId);
-			}
-
-			/**
-			 * Add a custom skill with version.
-			 * @param skillId The custom skill ID
-			 * @param version The version
-			 * @return this builder
-			 * @deprecated Use {@link #skill(String, String)} instead
-			 */
-			@Deprecated
-			public SkillContainerBuilder customSkill(String skillId, String version) {
-				return this.skill(skillId, version);
-			}
-
-			public SkillContainer build() {
-				return new SkillContainer(new ArrayList<>(this.skills));
-			}
-
-		}
-
-	}
-
-	// @formatter:off
-	/**
-	 * Metadata for a file generated by Claude Skills or uploaded via Files API.
-	 * Files expire after a certain period (typically 24 hours).
-	 *
-	 * @param id Unique file identifier (format: file_*)
-	 * @param filename Original filename with extension
-	 * @param size File size in bytes
-	 * @param mimeType MIME type (e.g., application/vnd.openxmlformats-officedocument.spreadsheetml.sheet)
-	 * @param createdAt When the file was created (ISO 8601 timestamp)
-	 * @param expiresAt When the file will be automatically deleted (ISO 8601 timestamp)
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record FileMetadata(
-			@JsonProperty("id") String id,
-			@JsonProperty("filename") String filename,
-			@JsonProperty("size") Long size,
-			@JsonProperty("mime_type") String mimeType,
-			@JsonProperty("created_at") String createdAt,
-			@JsonProperty("expires_at") String expiresAt) {
-	}
-
-	/**
-	 * Paginated list of files response from the Files API.
-	 *
-	 * @param data List of file metadata objects
-	 * @param hasMore Whether more results exist
-	 * @param nextPage Pagination token for next page
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record FilesListResponse(
-			@JsonProperty("data") List<FileMetadata> data,
-			@JsonProperty("has_more") Boolean hasMore,
-			@JsonProperty("next_page") String nextPage) {
-	}
-	// @formatter:on
-
-	/**
-	 * The event type of the streamed chunk.
-	 */
-	public enum EventType {
-
-		/**
-		 * Message start event. Contains a Message object with empty content.
-		 */
-		@JsonProperty("message_start")
-		MESSAGE_START,
-
-		/**
-		 * Message delta event, indicating top-level changes to the final Message object.
-		 */
-		@JsonProperty("message_delta")
-		MESSAGE_DELTA,
-
-		/**
-		 * A final message stop event.
-		 */
-		@JsonProperty("message_stop")
-		MESSAGE_STOP,
-
-		/**
-		 * Content block start event.
-		 */
-		@JsonProperty("content_block_start")
-		CONTENT_BLOCK_START,
-
-		/**
-		 * Content block delta event.
-		 */
-		@JsonProperty("content_block_delta")
-		CONTENT_BLOCK_DELTA,
-
-		/**
-		 * A final content block stop event.
-		 */
-		@JsonProperty("content_block_stop")
-		CONTENT_BLOCK_STOP,
-
-		/**
-		 * Error event.
-		 */
-		@JsonProperty("error")
-		ERROR,
-
-		/**
-		 * Ping event.
-		 */
-		@JsonProperty("ping")
-		PING,
-
-		/**
-		 * Artificially created event to aggregate tool use events.
-		 */
-		TOOL_USE_AGGREGATE
-
-	}
-
-	@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
-			visible = true)
-	@JsonSubTypes({ @JsonSubTypes.Type(value = ContentBlockStartEvent.class, name = "content_block_start"),
-			@JsonSubTypes.Type(value = ContentBlockDeltaEvent.class, name = "content_block_delta"),
-			@JsonSubTypes.Type(value = ContentBlockStopEvent.class, name = "content_block_stop"),
-			@JsonSubTypes.Type(value = PingEvent.class, name = "ping"),
-			@JsonSubTypes.Type(value = ErrorEvent.class, name = "error"),
-			@JsonSubTypes.Type(value = MessageStartEvent.class, name = "message_start"),
-			@JsonSubTypes.Type(value = MessageDeltaEvent.class, name = "message_delta"),
-			@JsonSubTypes.Type(value = MessageStopEvent.class, name = "message_stop") })
-	public interface StreamEvent {
-
-		@JsonProperty("type")
-		EventType type();
-
-	}
-
-	/**
-	 * Chat completion request object.
-	 *
-	 * @param model The model that will complete your prompt. See the list of
-	 * <a href="https://docs.anthropic.com/claude/docs/models-overview">models</a> for
-	 * additional details and options.
-	 * @param messages Input messages.
-	 * @param system System prompt. Can be a String (for compatibility) or a
-	 * List&lt;ContentBlock&gt; (for caching support). A system prompt is a way of
-	 * providing context and instructions to Claude, such as specifying a particular goal
-	 * or role. See our
-	 * <a href="https://docs.anthropic.com/claude/docs/system-prompts">guide</a> to system
-	 * prompts.
-	 * @param maxTokens The maximum number of tokens to generate before stopping. Note
-	 * that our models may stop before reaching this maximum. This parameter only
-	 * specifies the absolute maximum number of tokens to generate. Different models have
-	 * different maximum values for this parameter.
-	 * @param metadata An object describing metadata about the request.
-	 * @param stopSequences Custom text sequences that will cause the model to stop
-	 * generating. Our models will normally stop when they have naturally completed their
-	 * turn, which will result in a response stop_reason of "end_turn". If you want the
-	 * model to stop generating when it encounters custom strings of text, you can use the
-	 * stop_sequences parameter. If the model encounters one of the custom sequences, the
-	 * response stop_reason value will be "stop_sequence" and the response stop_sequence
-	 * value will contain the matched stop sequence.
-	 * @param stream Whether to incrementally stream the response using server-sent
-	 * events.
-	 * @param temperature Amount of randomness injected into the response.Defaults to 1.0.
-	 * Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple
-	 * choice, and closer to 1.0 for creative and generative tasks. Note that even with
-	 * temperature of 0.0, the results will not be fully deterministic.
-	 * @param topP Use nucleus sampling. In nucleus sampling, we compute the cumulative
-	 * distribution over all the options for each subsequent token in decreasing
-	 * probability order and cut it off once it reaches a particular probability specified
-	 * by top_p. You should either alter temperature or top_p, but not both. Recommended
-	 * for advanced use cases only. You usually only need to use temperature.
-	 * @param topK Only sample from the top K options for each subsequent token. Used to
-	 * remove "long tail" low probability responses. Learn more technical details here.
-	 * Recommended for advanced use cases only. You usually only need to use temperature.
-	 * @param tools Definitions of tools that the model may use. If provided the model may
-	 * return tool_use content blocks that represent the model's use of those tools. You
-	 * can then run those tools using the tool input generated by the model and then
-	 * optionally return results back to the model using tool_result content blocks.
-	 * @param toolChoice How the model should use the provided tools. The model can use a
-	 * specific tool, any available tool, decide by itself, or not use tools at all.
-	 * @param thinking Configuration for the model's thinking mode. When enabled, the
-	 * model can perform more in-depth reasoning before responding to a query.
-	 * @param outputFormat Output format configuration for structured outputs.
-	 * @param container Container for Claude Skills configuration.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record ChatCompletionRequest(
-	// @formatter:off
-		@JsonProperty("model") String model,
-		@JsonProperty("messages") List<AnthropicMessage> messages,
-		@JsonProperty("system") @Nullable Object system,
-		@JsonProperty("max_tokens") Integer maxTokens,
-		@JsonProperty("metadata") @Nullable Metadata metadata,
-		@JsonProperty("stop_sequences") @Nullable List<String> stopSequences,
-		@JsonProperty("stream") @Nullable Boolean stream,
-		@JsonProperty("temperature") @Nullable Double temperature,
-		@JsonProperty("top_p") @Nullable Double topP,
-		@JsonProperty("top_k") @Nullable Integer topK,
-		@JsonProperty("tools") @Nullable List<Tool> tools,
-		@JsonProperty("tool_choice") @Nullable ToolChoice toolChoice,
-		@JsonProperty("thinking") @Nullable ThinkingConfig thinking,
-		@JsonProperty("output_format") @Nullable OutputFormat outputFormat,
-		@JsonProperty("container") @Nullable SkillContainer container) {
-		// @formatter:on
-
-		public ChatCompletionRequest(String model, List<AnthropicMessage> messages, @Nullable Object system,
-				Integer maxTokens, @Nullable Double temperature, @Nullable Boolean stream) {
-			this(model, messages, system, maxTokens, null, null, stream, temperature, null, null, null, null, null,
-					null, null);
-		}
-
-		public ChatCompletionRequest(String model, List<AnthropicMessage> messages, @Nullable Object system,
-				Integer maxTokens, @Nullable List<String> stopSequences, @Nullable Double temperature,
-				@Nullable Boolean stream) {
-			this(model, messages, system, maxTokens, null, stopSequences, stream, temperature, null, null, null, null,
-					null, null, null);
-		}
-
-		public static ChatCompletionRequestBuilder builder() {
-			return new ChatCompletionRequestBuilder();
-		}
-
-		public static ChatCompletionRequestBuilder from(ChatCompletionRequest request) {
-			return new ChatCompletionRequestBuilder(request);
-		}
-
-		@JsonInclude(Include.NON_NULL)
-		public record OutputFormat(@JsonProperty("type") String type,
-				@JsonProperty("schema") Map<String, Object> schema) {
-
-			public OutputFormat(String jsonSchema) {
-				this("json_schema", ModelOptionsUtils.jsonToMap(jsonSchema));
-			}
-		}
-
-		/**
-		 * Metadata about the request.
-		 *
-		 * @param userId An external identifier for the user who is associated with the
-		 * request. This should be a uuid, hash value, or other opaque identifier.
-		 * Anthropic may use this id to help detect abuse. Do not include any identifying
-		 * information such as name, email address, or phone number.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record Metadata(@JsonProperty("user_id") String userId) {
-
-		}
-
-		/**
-		 * @param type is the cache type supported by anthropic. <a href=
-		 * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Doc</a>
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record CacheControl(@JsonProperty("type") String type, @JsonProperty("ttl") @Nullable String ttl) {
-
-			public CacheControl(String type) {
-				this(type, "5m");
-			}
-		}
-
-		/**
-		 * Configuration for the model's thinking mode.
-		 *
-		 * @param type The type of thinking mode. Currently, "enabled" is supported.
-		 * @param budgetTokens The token budget available for the thinking process. Must
-		 * be ≥1024 and less than max_tokens.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record ThinkingConfig(@JsonProperty("type") ThinkingType type,
-				@JsonProperty("budget_tokens") Integer budgetTokens) {
-		}
-
-	}
-
-	public static final class ChatCompletionRequestBuilder {
-
-		private @Nullable String model;
-
-		private @Nullable List<AnthropicMessage> messages;
-
-		private @Nullable Object system;
-
-		private @Nullable Integer maxTokens;
-
-		private ChatCompletionRequest.@Nullable Metadata metadata;
-
-		private @Nullable List<String> stopSequences;
-
-		@Nullable private Boolean stream = false;
-
-		private @Nullable Double temperature;
-
-		private @Nullable Double topP;
-
-		private @Nullable Integer topK;
-
-		private @Nullable List<Tool> tools;
-
-		private @Nullable ToolChoice toolChoice;
-
-		private ChatCompletionRequest.@Nullable ThinkingConfig thinking;
-
-		private ChatCompletionRequest.@Nullable OutputFormat outputFormat;
-
-		private @Nullable SkillContainer container;
-
-		private ChatCompletionRequestBuilder() {
-		}
-
-		private ChatCompletionRequestBuilder(ChatCompletionRequest request) {
-			this.model = request.model;
-			this.messages = request.messages;
-			this.system = request.system;
-			this.maxTokens = request.maxTokens;
-			this.metadata = request.metadata;
-			this.stopSequences = request.stopSequences;
-			this.stream = request.stream;
-			this.temperature = request.temperature;
-			this.topP = request.topP;
-			this.topK = request.topK;
-			this.tools = request.tools;
-			this.toolChoice = request.toolChoice;
-			this.thinking = request.thinking;
-			this.outputFormat = request.outputFormat;
-			this.container = request.container;
-		}
-
-		public ChatCompletionRequestBuilder model(ChatModel model) {
-			this.model = model.getValue();
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder model(String model) {
-			this.model = model;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder messages(List<AnthropicMessage> messages) {
-			this.messages = messages;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder system(Object system) {
-			this.system = system;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder maxTokens(Integer maxTokens) {
-			this.maxTokens = maxTokens;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder metadata(ChatCompletionRequest.Metadata metadata) {
-			this.metadata = metadata;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder stopSequences(List<String> stopSequences) {
-			this.stopSequences = stopSequences;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder stream(Boolean stream) {
-			this.stream = stream;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder temperature(Double temperature) {
-			this.temperature = temperature;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder topP(Double topP) {
-			this.topP = topP;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder topK(Integer topK) {
-			this.topK = topK;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder tools(List<Tool> tools) {
-			this.tools = tools;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder toolChoice(ToolChoice toolChoice) {
-			this.toolChoice = toolChoice;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder thinking(ChatCompletionRequest.ThinkingConfig thinking) {
-			this.thinking = thinking;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder thinking(ThinkingType type, Integer budgetTokens) {
-			this.thinking = new ChatCompletionRequest.ThinkingConfig(type, budgetTokens);
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder outputFormat(ChatCompletionRequest.OutputFormat outputFormat) {
-			this.outputFormat = outputFormat;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder container(SkillContainer container) {
-			this.container = container;
-			return this;
-		}
-
-		public ChatCompletionRequestBuilder skills(@Nullable List<Skill> skills) {
-			if (skills != null && !skills.isEmpty()) {
-				this.container = new SkillContainer(skills);
-			}
-			return this;
-		}
-
-		public ChatCompletionRequest build() {
-			Assert.state(this.model != null, "model can't be null");
-			Assert.state(this.messages != null, "messages can't be null");
-			Assert.state(this.maxTokens != null, "maxTokens can't be null");
-
-			return new ChatCompletionRequest(this.model, this.messages, this.system, this.maxTokens, this.metadata,
-					this.stopSequences, this.stream, this.temperature, this.topP, this.topK, this.tools,
-					this.toolChoice, this.thinking, this.outputFormat, this.container);
-		}
-
-	}
-
-	///////////////////////////////////////
-	/// ERROR EVENT
-	///////////////////////////////////////
-
-	/**
-	 * Input messages.
-	 *
-	 * Our models are trained to operate on alternating user and assistant conversational
-	 * turns. When creating a new Message, you specify the prior conversational turns with
-	 * the messages parameter, and the model then generates the next Message in the
-	 * conversation. Each input message must be an object with a role and content. You can
-	 * specify a single user-role message, or you can include multiple user and assistant
-	 * messages. The first message must always use the user role. If the final message
-	 * uses the assistant role, the response content will continue immediately from the
-	 * content in that message. This can be used to constrain part of the model's
-	 * response.
-	 *
-	 * @param content The contents of the message. Can be of one of String or
-	 * MultiModalContent.
-	 * @param role The role of the messages author. Could be one of the {@link Role}
-	 * types.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record AnthropicMessage(
-	// @formatter:off
-		@JsonProperty("content") List<ContentBlock> content,
-		@JsonProperty("role") Role role) {
-		// @formatter:on
-	}
-
-	/**
-	 * Citations configuration for document ContentBlocks.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record CitationsConfig(@JsonProperty("enabled") Boolean enabled) {
-	}
-
-	/**
-	 * Citation response structure from Anthropic API. Maps to the actual API response
-	 * format for citations. Contains location information that varies by document type:
-	 * character indices for plain text, page numbers for PDFs, or content block indices
-	 * for custom content.
-	 *
-	 * @param type The citation location type ("char_location", "page_location", or
-	 * "content_block_location")
-	 * @param citedText The text that was cited from the document
-	 * @param documentIndex The index of the document that was cited (0-based)
-	 * @param documentTitle The title of the document that was cited
-	 * @param startCharIndex The starting character index for "char_location" type
-	 * (0-based, inclusive)
-	 * @param endCharIndex The ending character index for "char_location" type (exclusive)
-	 * @param startPageNumber The starting page number for "page_location" type (1-based,
-	 * inclusive)
-	 * @param endPageNumber The ending page number for "page_location" type (exclusive)
-	 * @param startBlockIndex The starting content block index for
-	 * "content_block_location" type (0-based, inclusive)
-	 * @param endBlockIndex The ending content block index for "content_block_location"
-	 * type (exclusive)
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record CitationResponse(@JsonProperty("type") String type, @JsonProperty("cited_text") String citedText,
-			@JsonProperty("document_index") Integer documentIndex,
-			@JsonProperty("document_title") @Nullable String documentTitle,
-
-			// For char_location type
-			@JsonProperty("start_char_index") @Nullable Integer startCharIndex,
-			@JsonProperty("end_char_index") @Nullable Integer endCharIndex,
-
-			// For page_location type
-			@JsonProperty("start_page_number") @Nullable Integer startPageNumber,
-			@JsonProperty("end_page_number") @Nullable Integer endPageNumber,
-
-			// For content_block_location type
-			@JsonProperty("start_block_index") @Nullable Integer startBlockIndex,
-			@JsonProperty("end_block_index") @Nullable Integer endBlockIndex) {
-	}
-
-	/**
-	 * The content block of the message.
-	 *
-	 * @param type the content type can be "text", "image", "tool_use", "tool_result" or
-	 * "text_delta".
-	 * @param source The source of the media content. Applicable for "image" types only.
-	 * @param text The text of the message. Applicable for "text" types only.
-	 * @param index The index of the content block. Applicable only for streaming
-	 * responses.
-	 * @param id The id of the tool use. Applicable only for tool_use response.
-	 * @param name The name of the tool use. Applicable only for tool_use response.
-	 * @param input The input of the tool use. Applicable only for tool_use response.
-	 * @param toolUseId The id of the tool use. Applicable only for tool_result response.
-	 * @param content The content of the tool result. Applicable only for tool_result
-	 * response.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ContentBlock(
-	// @formatter:off
-		@JsonProperty("type") Type type,
-		@JsonProperty("source") @Nullable Source source,
-		@JsonProperty("text") @Nullable String text,
-
-		// applicable only for streaming responses.
-		@JsonProperty("index") @Nullable Integer index,
-
-		// tool_use response only
-		@JsonProperty("id") @Nullable String id,
-		@JsonProperty("name") @Nullable String name,
-		@JsonProperty("input") @Nullable Map<String, Object> input,
-
-		// tool_result response only
-		@JsonProperty("tool_use_id") @Nullable String toolUseId,
-		@JsonProperty("content") @Nullable Object content,
-
-		// Thinking only
-		@JsonProperty("signature") @Nullable String signature,
-		@JsonProperty("thinking") @Nullable String thinking,
-
-		// Redacted Thinking only
-		@JsonProperty("data") @Nullable String data,
-
-		// cache object
-		@JsonProperty("cache_control") @Nullable CacheControl cacheControl,
-
-		// Citation fields
-		@JsonProperty("title") @Nullable String title,
-		@JsonProperty("context") @Nullable String context,
-		@JsonProperty("citations") @Nullable Object citations, // Can be CitationsConfig for requests or List<CitationResponse> for responses
-
-		// File fields (for Skills-generated files)
-		@JsonProperty("file_id") @Nullable String fileId,
-		@JsonProperty("filename") @Nullable String filename
-	) {
-		// @formatter:on
-
-		/**
-		 * Create content block
-		 * @param mediaType The media type of the content.
-		 * @param data The content data.
-		 */
-		public ContentBlock(String mediaType, String data) {
-			this(new Source(mediaType, data));
-		}
-
-		/**
-		 * Create content block
-		 * @param type The type of the content.
-		 * @param source The source of the content.
-		 */
-		public ContentBlock(Type type, Source source) {
-			this(type, source, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
-					null);
-		}
-
-		/**
-		 * Create content block
-		 * @param source The source of the content.
-		 */
-		public ContentBlock(Source source) {
-			this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
-					null, null);
-		}
-
-		/**
-		 * Create content block
-		 * @param text The text of the content.
-		 */
-		public ContentBlock(@Nullable String text) {
-			this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null, null, null, null,
-					null, null);
-		}
-
-		public ContentBlock(@Nullable String text, @Nullable CacheControl cache) {
-			this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache, null, null, null,
-					null, null);
-		}
-
-		// Tool result
-		/**
-		 * Create content block
-		 * @param type The type of the content.
-		 * @param toolUseId The id of the tool use.
-		 * @param content The content of the tool result.
-		 */
-		public ContentBlock(Type type, String toolUseId, Object content) {
-			this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null, null, null, null,
-					null, null);
-		}
-
-		/**
-		 * Create content block
-		 * @param type The type of the content.
-		 * @param source The source of the content.
-		 * @param text The text of the content.
-		 * @param index The index of the content block.
-		 */
-		public ContentBlock(Type type, @Nullable Source source, String text, Integer index) {
-			this(type, source, text, index, null, null, null, null, null, null, null, null, null, null, null, null,
-					null, null);
-		}
-
-		// Tool use input JSON delta streaming
-		/**
-		 * Create content block
-		 * @param type The type of the content.
-		 * @param id The id of the tool use.
-		 * @param name The name of the tool use.
-		 * @param input The input of the tool use.
-		 */
-		public ContentBlock(Type type, @Nullable String id, @Nullable String name, Map<String, Object> input) {
-			this(type, null, null, null, id, name, input, null, null, null, null, null, null, null, null, null, null,
-					null);
-		}
-
-		/**
-		 * Create a document ContentBlock with citations and optional caching.
-		 * @param source The document source
-		 * @param title Optional document title
-		 * @param context Optional document context
-		 * @param citationsEnabled Whether citations are enabled
-		 * @param cacheControl Optional cache control (can be null)
-		 */
-		public ContentBlock(Source source, @Nullable String title, @Nullable String context, boolean citationsEnabled,
-				@Nullable CacheControl cacheControl) {
-			this(Type.DOCUMENT, source, null, null, null, null, null, null, null, null, null, null, cacheControl, title,
-					context, citationsEnabled ? new CitationsConfig(true) : null, null, null);
-		}
-
-		public static ContentBlockBuilder from(ContentBlock contentBlock) {
-			return new ContentBlockBuilder(contentBlock);
-		}
-
-		/**
-		 * The ContentBlock type.
-		 */
-		public enum Type {
-
-			/**
-			 * Tool request
-			 */
-			@JsonProperty("tool_use")
-			TOOL_USE("tool_use"),
-
-			/**
-			 * Send tool result back to LLM.
-			 */
-			@JsonProperty("tool_result")
-			TOOL_RESULT("tool_result"),
-
-			/**
-			 * Text message.
-			 */
-			@JsonProperty("text")
-			TEXT("text"),
-
-			/**
-			 * Text delta message. Returned from the streaming response.
-			 */
-			@JsonProperty("text_delta")
-			TEXT_DELTA("text_delta"),
-
-			/**
-			 * When using extended thinking with streaming enabled, you'll receive
-			 * thinking content via thinking_delta events. These deltas correspond to the
-			 * thinking field of the thinking content blocks.
-			 */
-			@JsonProperty("thinking_delta")
-			THINKING_DELTA("thinking_delta"),
-
-			/**
-			 * For thinking content, a special signature_delta event is sent just before
-			 * the content_block_stop event. This signature is used to verify the
-			 * integrity of the thinking block.
-			 */
-			@JsonProperty("signature_delta")
-			SIGNATURE_DELTA("signature_delta"),
-
-			/**
-			 * Tool use input partial JSON delta streaming.
-			 */
-			@JsonProperty("input_json_delta")
-			INPUT_JSON_DELTA("input_json_delta"),
-
-			/**
-			 * Image message.
-			 */
-			@JsonProperty("image")
-			IMAGE("image"),
-
-			/**
-			 * Document message.
-			 */
-			@JsonProperty("document")
-			DOCUMENT("document"),
-
-			/**
-			 * Thinking message.
-			 */
-			@JsonProperty("thinking")
-			THINKING("thinking"),
-
-			/**
-			 * Redacted Thinking message.
-			 */
-			@JsonProperty("redacted_thinking")
-			REDACTED_THINKING("redacted_thinking"),
-
-			/**
-			 * File content block representing a file generated by Skills. Used in
-			 * {@link org.springframework.ai.anthropic.AnthropicSkillsResponseHelper} to
-			 * extract file IDs for downloading generated documents.
-			 */
-			@JsonProperty("file")
-			FILE("file"),
-
-			/**
-			 * Bash code execution tool result returned in Skills responses. Observed in
-			 * actual API responses where file IDs are nested within this content block.
-			 * Required for JSON deserialization.
-			 */
-			@JsonProperty("bash_code_execution_tool_result")
-			BASH_CODE_EXECUTION_TOOL_RESULT("bash_code_execution_tool_result"),
-
-			/**
-			 * Text editor code execution tool result returned in Skills responses.
-			 * Observed in actual API responses. Required for JSON deserialization.
-			 */
-			@JsonProperty("text_editor_code_execution_tool_result")
-			TEXT_EDITOR_CODE_EXECUTION_TOOL_RESULT("text_editor_code_execution_tool_result"),
-
-			/**
-			 * Server-side tool use returned in Skills responses. Observed in actual API
-			 * responses when Skills invoke server-side tools. Required for JSON
-			 * deserialization.
-			 */
-			@JsonProperty("server_tool_use")
-			SERVER_TOOL_USE("server_tool_use");
-
-			public final String value;
-
-			Type(String value) {
-				this.value = value;
-			}
-
-			/**
-			 * Get the value of the type.
-			 * @return The value of the type.
-			 */
-			public String getValue() {
-				return this.value;
-			}
-
-		}
-
-		/**
-		 * The source of the media content. (Applicable for "image" types only)
-		 *
-		 * @param type The type of the media content. Only "base64" is supported at the
-		 * moment.
-		 * @param mediaType The media type of the content. For example, "image/png" or
-		 * "image/jpeg".
-		 * @param data The base64-encoded data of the content.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record Source(
-		// @formatter:off
-			@JsonProperty("type") String type,
-			@JsonProperty("media_type") @Nullable String mediaType,
-			@JsonProperty("data") @Nullable String data,
-			@JsonProperty("url") @Nullable String url,
-			@JsonProperty("content") @Nullable List<ContentBlock> content) {
-			// @formatter:on
-
-			/**
-			 * Create source
-			 * @param mediaType The media type of the content.
-			 * @param data The content data.
-			 */
-			public Source(String mediaType, String data) {
-				this("base64", mediaType, data, null, null);
-			}
-
-			public Source(String url) {
-				this("url", null, null, url, null);
-			}
-
-			public Source(List<ContentBlock> content) {
-				this("content", null, null, null, content);
-			}
-
-		}
-
-		public static class ContentBlockBuilder {
-
-			private Type type;
-
-			private @Nullable Source source;
-
-			private @Nullable String text;
-
-			private @Nullable Integer index;
-
-			private @Nullable String id;
-
-			private @Nullable String name;
-
-			private @Nullable Map<String, Object> input;
-
-			private @Nullable String toolUseId;
-
-			private @Nullable Object content;
-
-			private @Nullable String signature;
-
-			private @Nullable String thinking;
-
-			private @Nullable String data;
-
-			private @Nullable CacheControl cacheControl;
-
-			private @Nullable String title;
-
-			private @Nullable String context;
-
-			private @Nullable Object citations;
-
-			public ContentBlockBuilder(ContentBlock contentBlock) {
-				this.type = contentBlock.type;
-				this.source = contentBlock.source;
-				this.text = contentBlock.text;
-				this.index = contentBlock.index;
-				this.id = contentBlock.id;
-				this.name = contentBlock.name;
-				this.input = contentBlock.input;
-				this.toolUseId = contentBlock.toolUseId;
-				this.content = contentBlock.content;
-				this.signature = contentBlock.signature;
-				this.thinking = contentBlock.thinking;
-				this.data = contentBlock.data;
-				this.cacheControl = contentBlock.cacheControl;
-				this.title = contentBlock.title;
-				this.context = contentBlock.context;
-				this.citations = contentBlock.citations;
-			}
-
-			public ContentBlockBuilder type(Type type) {
-				this.type = type;
-				return this;
-			}
-
-			public ContentBlockBuilder source(Source source) {
-				this.source = source;
-				return this;
-			}
-
-			public ContentBlockBuilder text(String text) {
-				this.text = text;
-				return this;
-			}
-
-			public ContentBlockBuilder index(Integer index) {
-				this.index = index;
-				return this;
-			}
-
-			public ContentBlockBuilder id(String id) {
-				this.id = id;
-				return this;
-			}
-
-			public ContentBlockBuilder name(String name) {
-				this.name = name;
-				return this;
-			}
-
-			public ContentBlockBuilder input(Map<String, Object> input) {
-				this.input = input;
-				return this;
-			}
-
-			public ContentBlockBuilder toolUseId(String toolUseId) {
-				this.toolUseId = toolUseId;
-				return this;
-			}
-
-			public ContentBlockBuilder content(Object content) {
-				this.content = content;
-				return this;
-			}
-
-			public ContentBlockBuilder signature(String signature) {
-				this.signature = signature;
-				return this;
-			}
-
-			public ContentBlockBuilder thinking(String thinking) {
-				this.thinking = thinking;
-				return this;
-			}
-
-			public ContentBlockBuilder data(String data) {
-				this.data = data;
-				return this;
-			}
-
-			public ContentBlockBuilder cacheControl(CacheControl cacheControl) {
-				this.cacheControl = cacheControl;
-				return this;
-			}
-
-			public ContentBlock build() {
-				return new ContentBlock(this.type, this.source, this.text, this.index, this.id, this.name, this.input,
-						this.toolUseId, this.content, this.signature, this.thinking, this.data, this.cacheControl,
-						this.title, this.context, this.citations, null, null);
-			}
-
-		}
-	}
-
-	///////////////////////////////////////
-	/// CONTENT_BLOCK EVENTS
-	///////////////////////////////////////
-
-	/**
-	 * Tool description.
-	 *
-	 * @param type The type of the tool (e.g., "code_execution_20250825" for code
-	 * execution).
-	 * @param name The name of the tool.
-	 * @param description A description of the tool.
-	 * @param inputSchema The input schema of the tool.
-	 * @param cacheControl Optional cache control for this tool.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record Tool(
-	// @formatter:off
-		@JsonProperty("type") @Nullable String type,
-		@JsonProperty("name") String name,
-		@JsonProperty("description") @Nullable String description,
-		@JsonProperty("input_schema") @Nullable Map<String, Object> inputSchema,
-		@JsonProperty("cache_control") @Nullable CacheControl cacheControl) {
-		// @formatter:on
-
-		/**
-		 * Constructor for backward compatibility without type or cache control.
-		 */
-		public Tool(String name, String description, Map<String, Object> inputSchema) {
-			this(null, name, description, inputSchema, null);
-		}
-
-		/**
-		 * Constructor for backward compatibility without cache control.
-		 */
-		public Tool(String type, String name, @Nullable String description, @Nullable Map<String, Object> inputSchema) {
-			this(type, name, description, inputSchema, null);
-		}
-
-	}
-
-	/**
-	 * Base interface for tool choice options.
-	 */
-	@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
-			visible = true)
-	@JsonSubTypes({ @JsonSubTypes.Type(value = ToolChoiceAuto.class, name = "auto"),
-			@JsonSubTypes.Type(value = ToolChoiceAny.class, name = "any"),
-			@JsonSubTypes.Type(value = ToolChoiceTool.class, name = "tool"),
-			@JsonSubTypes.Type(value = ToolChoiceNone.class, name = "none") })
-	public interface ToolChoice {
-
-		@JsonProperty("type")
-		String type();
-
-	}
-
-	/**
-	 * Auto tool choice - the model will automatically decide whether to use tools.
-	 *
-	 * @param type The type of tool choice, always "auto".
-	 * @param disableParallelToolUse Whether to disable parallel tool use. Defaults to
-	 * false. If set to true, the model will output at most one tool use.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record ToolChoiceAuto(@JsonProperty("type") String type,
-			@JsonProperty("disable_parallel_tool_use") @Nullable Boolean disableParallelToolUse) implements ToolChoice {
-
-		/**
-		 * Create an auto tool choice with default settings.
-		 */
-		public ToolChoiceAuto() {
-			this("auto", null);
-		}
-
-		/**
-		 * Create an auto tool choice with specific parallel tool use setting.
-		 * @param disableParallelToolUse Whether to disable parallel tool use.
-		 */
-		public ToolChoiceAuto(Boolean disableParallelToolUse) {
-			this("auto", disableParallelToolUse);
-		}
-
-	}
-
-	/**
-	 * Any tool choice - the model will use any available tools.
-	 *
-	 * @param type The type of tool choice, always "any".
-	 * @param disableParallelToolUse Whether to disable parallel tool use. Defaults to
-	 * false. If set to true, the model will output exactly one tool use.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record ToolChoiceAny(@JsonProperty("type") String type,
-			@JsonProperty("disable_parallel_tool_use") @Nullable Boolean disableParallelToolUse) implements ToolChoice {
-
-		/**
-		 * Create an any tool choice with default settings.
-		 */
-		public ToolChoiceAny() {
-			this("any", null);
-		}
-
-		/**
-		 * Create an any tool choice with specific parallel tool use setting.
-		 * @param disableParallelToolUse Whether to disable parallel tool use.
-		 */
-		public ToolChoiceAny(Boolean disableParallelToolUse) {
-			this("any", disableParallelToolUse);
-		}
-
-	}
-
-	/**
-	 * Tool choice - the model will use the specified tool.
-	 *
-	 * @param type The type of tool choice, always "tool".
-	 * @param name The name of the tool to use.
-	 * @param disableParallelToolUse Whether to disable parallel tool use. Defaults to
-	 * false. If set to true, the model will output exactly one tool use.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record ToolChoiceTool(@JsonProperty("type") String type, @JsonProperty("name") String name,
-			@JsonProperty("disable_parallel_tool_use") @Nullable Boolean disableParallelToolUse) implements ToolChoice {
-
-		/**
-		 * Create a tool choice for a specific tool.
-		 * @param name The name of the tool to use.
-		 */
-		public ToolChoiceTool(String name) {
-			this("tool", name, null);
-		}
-
-		/**
-		 * Create a tool choice for a specific tool with parallel tool use setting.
-		 * @param name The name of the tool to use.
-		 * @param disableParallelToolUse Whether to disable parallel tool use.
-		 */
-		public ToolChoiceTool(String name, Boolean disableParallelToolUse) {
-			this("tool", name, disableParallelToolUse);
-		}
-
-	}
-
-	/**
-	 * None tool choice - the model will not be allowed to use tools.
-	 *
-	 * @param type The type of tool choice, always "none".
-	 */
-	@JsonInclude(Include.NON_NULL)
-	public record ToolChoiceNone(@JsonProperty("type") String type) implements ToolChoice {
-
-		/**
-		 * Create a none tool choice.
-		 */
-		public ToolChoiceNone() {
-			this("none");
-		}
-
-	}
-
-	// CB START EVENT
-
-	/**
-	 * Chat completion response object.
-	 *
-	 * @param id Unique object identifier. The format and length of IDs may change over
-	 * time.
-	 * @param type Object type. For Messages, this is always "message".
-	 * @param role Conversational role of the generated message. This will always be
-	 * "assistant".
-	 * @param content Content generated by the model. This is an array of content blocks.
-	 * @param model The model that handled the request.
-	 * @param stopReason The reason the model stopped generating tokens. This will be one
-	 * of "end_turn", "max_tokens", "stop_sequence", "tool_use", or "timeout".
-	 * @param stopSequence Which custom stop sequence was generated, if any.
-	 * @param usage Input and output token usage.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ChatCompletionResponse(
-	// @formatter:off
-		@SuppressWarnings("NullAway.Init") @JsonProperty("id") String id,
-		@JsonProperty("type") @Nullable String type,
-		@JsonProperty("role") @Nullable Role role,
-		@SuppressWarnings("NullAway.Init") @JsonProperty("content") List<ContentBlock> content,
-		@SuppressWarnings("NullAway.Init") @JsonProperty("model") String model,
-		@JsonProperty("stop_reason") @Nullable String stopReason,
-		@JsonProperty("stop_sequence") @Nullable String stopSequence,
-		@JsonProperty("usage") @Nullable Usage usage,
-		@JsonProperty("container") @Nullable Container container) {
-		// @formatter:on
-
-		/**
-		 * Container information for Skills execution context. Contains container_id that
-		 * can be reused across multi-turn conversations.
-		 *
-		 * @param id Container identifier (format: container_*)
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record Container(@JsonProperty("id") String id) {
-		}
-	}
-
-	// CB DELTA EVENT
-
-	/**
-	 * Usage statistics.
-	 *
-	 * @param inputTokens The number of input tokens which were used.
-	 * @param outputTokens The number of output tokens which were used. completion).
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record Usage(
-	// @formatter:off
-		@JsonProperty("input_tokens") @Nullable Integer inputTokens,
-		@JsonProperty("output_tokens") @Nullable Integer outputTokens,
-		@JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens,
-		@JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) {
-		// @formatter:off
-	}
-
-	 /// ECB STOP
-
-	/**
-	 * Special event used to aggregate multiple tool use events into a single event with
-	 * list of aggregated ContentBlockToolUse.
-	*/
-	public static class ToolUseAggregationEvent implements StreamEvent {
-
-		private @Nullable Integer index;
-
-		private @Nullable String id;
-
-		private @Nullable String name;
-
-		private String partialJson = "";
-
-		private List<ContentBlockStartEvent.ContentBlockToolUse> toolContentBlocks = new ArrayList<>();
-
-		@Override
-		public EventType type() {
-			return EventType.TOOL_USE_AGGREGATE;
-		}
-
-		/**
-		  * Get tool content blocks.
-		  * @return The tool content blocks.
-		*/
-		public List<ContentBlockStartEvent.ContentBlockToolUse> getToolContentBlocks() {
-			return this.toolContentBlocks;
-		}
-
-		/**
-		  * Check if the event is empty.
-		  * @return True if the event is empty, false otherwise.
-		*/
-		public boolean isEmpty() {
-			return (this.index == null || this.id == null || this.name == null);
-		}
-
-		ToolUseAggregationEvent withIndex(@Nullable Integer index) {
-			this.index = index;
-			return this;
-		}
-
-		ToolUseAggregationEvent withId(@Nullable String id) {
-			this.id = id;
-			return this;
-		}
-
-		ToolUseAggregationEvent withName(@Nullable String name) {
-			this.name = name;
-			return this;
-		}
-
-		ToolUseAggregationEvent appendPartialJson(String partialJson) {
-			this.partialJson = this.partialJson + partialJson;
-			return this;
-		}
-
-		void squashIntoContentBlock() {
-			Map<String, Object> map = (StringUtils.hasText(this.partialJson))
-					? ModelOptionsUtils.jsonToMap(this.partialJson) : Map.of();
-			this.toolContentBlocks.add(new ContentBlockStartEvent.ContentBlockToolUse("tool_use", this.id, this.name, map));
-			this.index = null;
-			this.id = null;
-			this.name = null;
-			this.partialJson = "";
-		}
-
-		@Override
-		public String toString() {
-			return "EventToolUseBuilder [index=" + this.index + ", id=" + this.id + ", name=" + this.name + ", partialJson="
-					+ this.partialJson + ", toolUseMap=" + this.toolContentBlocks + "]";
-		}
-
-	}
-
-	 ///////////////////////////////////////
-	 /// MESSAGE EVENTS
-	 ///////////////////////////////////////
-
-	 // MESSAGE START EVENT
-
-	/**
-	 * Content block start event.
-	 * @param type The event type.
-	 * @param index The index of the content block.
-	 * @param contentBlock The content block body.
-	*/
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ContentBlockStartEvent(
-			// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("index") Integer index,
-		@JsonProperty("content_block") ContentBlockBody contentBlock) implements StreamEvent {
-
-		@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
-				visible = true)
-		@JsonSubTypes({
-				@JsonSubTypes.Type(value = ContentBlockToolUse.class, name = "tool_use"),
-				@JsonSubTypes.Type(value = ContentBlockText.class, name = "text"),
-				@JsonSubTypes.Type(value = ContentBlockThinking.class, name = "thinking")
-		})
-		public interface ContentBlockBody {
-			String type();
-		}
-
-		/**
-		  * Tool use content block.
-		  * @param type The content block type.
-		  * @param id The tool use id.
-		  * @param name The tool use name.
-		  * @param input The tool use input.
-		*/
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockToolUse(
-			@JsonProperty("type") String type,
-			@JsonProperty("id") @Nullable String id,
-			@JsonProperty("name") @Nullable String name,
-			@JsonProperty("input") Map<String, Object> input) implements ContentBlockBody {
-		}
-
-		/**
-		  * Text content block.
-		  * @param type The content block type.
-		  * @param text The text content.
-		*/
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockText(
-			@JsonProperty("type") String type,
-			@JsonProperty("text") String text) implements ContentBlockBody {
-		}
-
-		/**
-		 * Thinking content block.
-		 * @param type The content block type.
-		 * @param thinking The thinking content.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		public record ContentBlockThinking(
-			@JsonProperty("type") String type,
-			@JsonProperty("thinking") String thinking,
-			@JsonProperty("signature") String signature) implements ContentBlockBody {
-		}
-	}
-	// @formatter:on
-
-	// MESSAGE DELTA EVENT
-
-	/**
-	 * Content block delta event.
-	 *
-	 * @param type The event type.
-	 * @param index The index of the content block.
-	 * @param delta The content block delta body.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ContentBlockDeltaEvent(
-	// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("index") Integer index,
-		@JsonProperty("delta") ContentBlockDeltaBody delta) implements StreamEvent {
-
-		@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
-				visible = true)
-		@JsonSubTypes({ @JsonSubTypes.Type(value = ContentBlockDeltaText.class, name = "text_delta"),
-				@JsonSubTypes.Type(value = ContentBlockDeltaJson.class, name = "input_json_delta"),
-				@JsonSubTypes.Type(value = ContentBlockDeltaThinking.class, name = "thinking_delta"),
-				@JsonSubTypes.Type(value = ContentBlockDeltaSignature.class, name = "signature_delta")
-		})
-		public interface ContentBlockDeltaBody {
-			String type();
-		}
-
-		/**
-		 * Text content block delta.
-		 * @param type The content block type.
-		 * @param text The text content.
-		*/
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockDeltaText(
-			@JsonProperty("type") String type,
-			@JsonProperty("text") String text) implements ContentBlockDeltaBody {
-		}
-
-		/**
-		  * JSON content block delta.
-		  * @param type The content block type.
-		  * @param partialJson The partial JSON content.
-		  */
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockDeltaJson(
-			@JsonProperty("type") String type,
-			@JsonProperty("partial_json") String partialJson) implements ContentBlockDeltaBody {
-		}
-
-		/**
-		 * Thinking content block delta.
-		 * @param type The content block type.
-		 * @param thinking The thinking content.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockDeltaThinking(
-			@JsonProperty("type") String type,
-			@JsonProperty("thinking") String thinking) implements ContentBlockDeltaBody {
-		}
-
-		/**
-		 * Signature content block delta.
-		 * @param type The content block type.
-		 * @param signature The signature content.
-		 */
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record ContentBlockDeltaSignature(
-			@JsonProperty("type") String type,
-			@JsonProperty("signature") String signature) implements ContentBlockDeltaBody {
-		}
-	}
-	// @formatter:on
-
-	// MESSAGE STOP EVENT
-
-	/**
-	 * Content block stop event.
-	 *
-	 * @param type The event type.
-	 * @param index The index of the content block.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ContentBlockStopEvent(
-	// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("index") Integer index) implements StreamEvent {
-	}
-	// @formatter:on
-
-	/**
-	 * Message start event.
-	 *
-	 * @param type The event type.
-	 * @param message The message body.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record MessageStartEvent(// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("message") ChatCompletionResponse message) implements StreamEvent {
-	}
-	// @formatter:on
-
-	/**
-	 * Message delta event.
-	 *
-	 * @param type The event type.
-	 * @param delta The message delta body.
-	 * @param usage The message delta usage.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record MessageDeltaEvent(
-	// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("delta") MessageDelta delta,
-		@JsonProperty("usage") @Nullable MessageDeltaUsage usage) implements StreamEvent {
-
-		/**
-		  * @param stopReason The stop reason.
-		  * @param stopSequence The stop sequence.
-		  */
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record MessageDelta(
-			@JsonProperty("stop_reason") String stopReason,
-			@JsonProperty("stop_sequence") String stopSequence) {
-		}
-
-		/**
-		 * Message delta usage.
-		 * @param outputTokens The output tokens.
-		*/
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record MessageDeltaUsage(
-			@JsonProperty("output_tokens") Integer outputTokens) {
-		}
-	}
-	// @formatter:on
-
-	/**
-	 * Message stop event.
-	 *
-	 * @param type The event type.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record MessageStopEvent(
-	//@formatter:off
-		@JsonProperty("type") EventType type) implements StreamEvent {
-	}
-	// @formatter:on
-
-	///////////////////////////////////////
-	/// ERROR EVENT
-	///////////////////////////////////////
-	/**
-	 * Error event.
-	 *
-	 * @param type The event type.
-	 * @param error The error body.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record ErrorEvent(
-	// @formatter:off
-		@JsonProperty("type") EventType type,
-		@JsonProperty("error") Error error) implements StreamEvent {
-
-		/**
-		 * Error body.
-		 * @param type The error type.
-		 * @param message The error message.
-		*/
-		@JsonInclude(Include.NON_NULL)
-		@JsonIgnoreProperties(ignoreUnknown = true)
-		public record Error(
-			@JsonProperty("type") String type,
-			@JsonProperty("message") String message) {
-		}
-	}
-	// @formatter:on
-
-	///////////////////////////////////////
-	/// PING EVENT
-	///////////////////////////////////////
-	/**
-	 * Ping event.
-	 *
-	 * @param type The event type.
-	 */
-	@JsonInclude(Include.NON_NULL)
-	@JsonIgnoreProperties(ignoreUnknown = true)
-	public record PingEvent(
-	// @formatter:off
-		@JsonProperty("type") EventType type) implements StreamEvent {
-	}
-	// @formatter:on
-
-	public static final class Builder {
-
-		private String baseUrl = DEFAULT_BASE_URL;
-
-		private String completionsPath = DEFAULT_MESSAGE_COMPLETIONS_PATH;
-
-		private @Nullable ApiKey apiKey;
-
-		private String anthropicVersion = DEFAULT_ANTHROPIC_VERSION;
-
-		private RestClient.Builder restClientBuilder = RestClient.builder();
-
-		private WebClient.Builder webClientBuilder = WebClient.builder();
-
-		private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
-
-		private String anthropicBetaFeatures = DEFAULT_ANTHROPIC_BETA_VERSION;
-
-		public Builder baseUrl(String baseUrl) {
-			Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
-			this.baseUrl = baseUrl;
-			return this;
-		}
-
-		public Builder completionsPath(String completionsPath) {
-			Assert.hasText(completionsPath, "completionsPath cannot be null or empty");
-			this.completionsPath = completionsPath;
-			return this;
-		}
-
-		public Builder apiKey(ApiKey apiKey) {
-			Assert.notNull(apiKey, "apiKey cannot be null");
-			this.apiKey = apiKey;
-			return this;
-		}
-
-		public Builder apiKey(String simpleApiKey) {
-			Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
-			this.apiKey = new SimpleApiKey(simpleApiKey);
-			return this;
-		}
-
-		public Builder anthropicVersion(String anthropicVersion) {
-			Assert.notNull(anthropicVersion, "anthropicVersion cannot be null");
-			this.anthropicVersion = anthropicVersion;
-			return this;
-		}
-
-		public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
-			Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
-			this.restClientBuilder = restClientBuilder;
-			return this;
-		}
-
-		public Builder webClientBuilder(WebClient.Builder webClientBuilder) {
-			Assert.notNull(webClientBuilder, "webClientBuilder cannot be null");
-			this.webClientBuilder = webClientBuilder;
-			return this;
-		}
-
-		public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
-			Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
-			this.responseErrorHandler = responseErrorHandler;
-			return this;
-		}
-
-		public Builder anthropicBetaFeatures(String anthropicBetaFeatures) {
-			Assert.notNull(anthropicBetaFeatures, "anthropicBetaFeatures cannot be null");
-			this.anthropicBetaFeatures = anthropicBetaFeatures;
-			return this;
-		}
-
-		public AnthropicApi build() {
-			Assert.notNull(this.apiKey, "apiKey must be set");
-			return new AnthropicApi(this.baseUrl, this.completionsPath, this.apiKey, this.anthropicVersion,
-					this.restClientBuilder, this.webClientBuilder, this.responseErrorHandler,
-					this.anthropicBetaFeatures);
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
deleted file mode 100644
index 37477a093b3..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-/**
- * Defines the caching strategy for Anthropic prompt caching. Anthropic allows up to 4
- * cache breakpoints per request, and the cache hierarchy follows the order: tools →
- * system → messages.
- *
- * @author Mark Pollack
- * @author Soby Chacko
- * @since 1.1.0
- */
-public enum AnthropicCacheStrategy {
-
-	/**
-	 * No caching (default behavior). All content is processed fresh on each request.
-	 * <p>
-	 * Use this when:
-	 * <ul>
-	 * <li>Requests are one-off or highly variable</li>
-	 * <li>Content doesn't meet minimum token requirements (1024+ tokens)</li>
-	 * <li>You want to avoid caching overhead</li>
-	 * </ul>
-	 */
-	NONE,
-
-	/**
-	 * Cache tool definitions only. Places a cache breakpoint on the last tool, while
-	 * system messages and conversation history remain uncached and are processed fresh on
-	 * each request.
-	 * <p>
-	 * Use this when:
-	 * <ul>
-	 * <li>Tool definitions are large and stable (5000+ tokens)</li>
-	 * <li>System prompts change frequently or are small (&lt;500 tokens)</li>
-	 * <li>You want to share cached tools across different system contexts (e.g.,
-	 * multi-tenant applications, A/B testing system prompts)</li>
-	 * <li>Tool definitions rarely change</li>
-	 * </ul>
-	 * <p>
-	 * <strong>Important:</strong> Changing any tool definition will invalidate this cache
-	 * entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
-	 * any downstream cache breakpoints (system, messages) if used in combination with
-	 * other strategies.
-	 */
-	TOOLS_ONLY,
-
-	/**
-	 * Cache system instructions only. Places a cache breakpoint on the system message
-	 * content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
-	 * mechanism (content before the cache breakpoint is included in the cache).
-	 * <p>
-	 * Use this when:
-	 * <ul>
-	 * <li>System prompts are large and stable (1024+ tokens)</li>
-	 * <li>Tool definitions are relatively small (&lt;20 tools)</li>
-	 * <li>You want simple, single-breakpoint caching</li>
-	 * </ul>
-	 * <p>
-	 * <strong>Note:</strong> Changing tools will invalidate the cache since tools are
-	 * part of the cache prefix (they appear before system in the request hierarchy).
-	 */
-	SYSTEM_ONLY,
-
-	/**
-	 * Cache system instructions and tool definitions. Places cache breakpoints on the
-	 * last tool (breakpoint 1) and system message content (breakpoint 2).
-	 * <p>
-	 * Use this when:
-	 * <ul>
-	 * <li>Both tools and system prompts are large and stable</li>
-	 * <li>You have many tools (20+ tools, beyond the automatic lookback window)</li>
-	 * <li>You want deterministic, explicit caching of both components</li>
-	 * <li>System prompts may change independently of tools</li>
-	 * </ul>
-	 * <p>
-	 * <strong>Behavior:</strong>
-	 * <ul>
-	 * <li>If only tools change: Both caches invalidated (tools + system)</li>
-	 * <li>If only system changes: Tools cache remains valid, system cache
-	 * invalidated</li>
-	 * </ul>
-	 * This allows efficient reuse of tool cache when only system prompts are updated.
-	 */
-	SYSTEM_AND_TOOLS,
-
-	/**
-	 * Cache the entire conversation history up to (but not including) the current user
-	 * question. Places a cache breakpoint on the last user message in the conversation
-	 * history, enabling incremental caching as the conversation grows.
-	 * <p>
-	 * Use this when:
-	 * <ul>
-	 * <li>Building multi-turn conversational applications (chatbots, assistants)</li>
-	 * <li>Conversation history is large and grows over time</li>
-	 * <li>You want to reuse conversation context while asking new questions</li>
-	 * <li>Using chat memory advisors or conversation persistence</li>
-	 * </ul>
-	 * <p>
-	 * <strong>Behavior:</strong> Each turn builds on the previous cached prefix. The
-	 * cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
-	 * + Message2], etc. This provides significant cost savings (90%+) and performance
-	 * improvements for long conversations.
-	 * <p>
-	 * <strong>Important:</strong> Changing tools or system prompts will invalidate the
-	 * entire conversation cache due to cascade invalidation. Tool and system stability is
-	 * critical for this strategy.
-	 */
-	CONVERSATION_HISTORY
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java
deleted file mode 100644
index e297ed5f0c4..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.function.Function;
-
-import org.jspecify.annotations.Nullable;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
-
-/**
- * Cache types supported by Anthropic's prompt caching feature.
- *
- * <p>
- * Prompt caching allows reusing frequently used prompts to reduce costs and improve
- * response times for repeated interactions.
- *
- * @see <a href=
- * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching">Anthropic Prompt
- * Caching</a>
- * @author Claudio Silva Junior
- * @author Soby Chacko
- * @author Austin Dase
- */
-public enum AnthropicCacheType {
-
-	/**
-	 * Ephemeral cache with 5-minute lifetime, refreshed on each use.
-	 */
-	EPHEMERAL(ttl -> new CacheControl("ephemeral", ttl));
-
-	private final Function<@Nullable String, CacheControl> value;
-
-	AnthropicCacheType(Function<@Nullable String, CacheControl> value) {
-		this.value = value;
-	}
-
-	/**
-	 * Returns a new CacheControl instance for this cache type.
-	 * @return a CacheControl instance configured for this cache type
-	 */
-	public CacheControl cacheControl() {
-		return this.value.apply(null);
-	}
-
-	/**
-	 * Returns a new CacheControl instance for this cache type with the specified TTL.
-	 * @param ttl the time-to-live for the cache entry (e.g., "5m" for 5 minutes, "1h" for
-	 * 1 hour)
-	 * @return a CacheControl instance configured for this cache type and TTL
-	 */
-	public CacheControl cacheControl(String ttl) {
-		return this.value.apply(ttl);
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
deleted file mode 100644
index faab346ec73..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
-import java.util.concurrent.atomic.AtomicReference;
-
-import org.jspecify.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaJson;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaSignature;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaText;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaThinking;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockText;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockThinking;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockToolUse;
-import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
-import org.springframework.ai.anthropic.api.AnthropicApi.MessageDeltaEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.MessageStartEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.anthropic.api.AnthropicApi.StreamEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.ToolUseAggregationEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.Usage;
-import org.springframework.util.Assert;
-import org.springframework.util.CollectionUtils;
-import org.springframework.util.StringUtils;
-
-/**
- * Helper class to support streaming function calling and thinking events.
- * <p>
- * It can merge the streamed {@link StreamEvent} chunks in case of function calling
- * message. It passes through other events like text, thinking, and signature deltas.
- *
- * @author Mariusz Bernacki
- * @author Christian Tzolov
- * @author Jihoon Kim
- * @author Alexandros Pappas
- * @author Claudio Silva Junior
- * @author Soby Chacko
- * @author Sun Yuhan
- * @since 1.0.0
- */
-public class StreamHelper {
-
-	private static final Logger logger = LoggerFactory.getLogger(StreamHelper.class);
-
-	public boolean isToolUseStart(@Nullable StreamEvent event) {
-		if (event == null || event.type() != EventType.CONTENT_BLOCK_START) {
-			return false;
-		}
-		return ContentBlock.Type.TOOL_USE.getValue().equals(((ContentBlockStartEvent) event).contentBlock().type());
-	}
-
-	public boolean isToolUseFinish(@Nullable StreamEvent event) {
-		// Tool use streaming sequence ends with a CONTENT_BLOCK_STOP event.
-		// The logic relies on the state machine (isInsideTool flag) managed in
-		// chatCompletionStream to know if this stop event corresponds to a tool use.
-		return event != null && event.type() == EventType.CONTENT_BLOCK_STOP;
-	}
-
-	/**
-	 * Merge the tool‑use related streaming events into one aggregate event so that the
-	 * upper layers see a single ContentBlock with the full JSON input.
-	 */
-	public StreamEvent mergeToolUseEvents(StreamEvent previousEvent, StreamEvent event) {
-
-		if (!(previousEvent instanceof ToolUseAggregationEvent eventAggregator)) {
-			return event;
-		}
-
-		if (event.type() == EventType.CONTENT_BLOCK_START) {
-			ContentBlockStartEvent contentBlockStart = (ContentBlockStartEvent) event;
-
-			if (ContentBlock.Type.TOOL_USE.getValue().equals(contentBlockStart.contentBlock().type())) {
-				ContentBlockToolUse cbToolUse = (ContentBlockToolUse) contentBlockStart.contentBlock();
-
-				return eventAggregator.withIndex(contentBlockStart.index())
-					.withId(cbToolUse.id())
-					.withName(cbToolUse.name())
-					.appendPartialJson(""); // CB START always has empty JSON.
-			}
-		}
-		else if (event.type() == EventType.CONTENT_BLOCK_DELTA) {
-			ContentBlockDeltaEvent contentBlockDelta = (ContentBlockDeltaEvent) event;
-			if (ContentBlock.Type.INPUT_JSON_DELTA.getValue().equals(contentBlockDelta.delta().type())) {
-				return eventAggregator
-					.appendPartialJson(((ContentBlockDeltaJson) contentBlockDelta.delta()).partialJson());
-			}
-		}
-		else if (event.type() == EventType.CONTENT_BLOCK_STOP) {
-			if (!eventAggregator.isEmpty()) {
-				eventAggregator.squashIntoContentBlock();
-				return eventAggregator;
-			}
-		}
-
-		return event;
-	}
-
-	/**
-	 * Converts a raw {@link StreamEvent} potentially containing tool use aggregates or
-	 * other block types (text, thinking) into a {@link ChatCompletionResponse} chunk.
-	 * @param event The incoming StreamEvent.
-	 * @param contentBlockReference Holds the state of the response being built across
-	 * multiple events.
-	 * @return A ChatCompletionResponse representing the processed chunk.
-	 */
-	// TODO How to deal with the use cases where id is null since it is now mandatory in
-	// ChatCompletionResponse?
-	public ChatCompletionResponse eventToChatCompletionResponse(StreamEvent event,
-			AtomicReference<ChatCompletionResponseBuilder> contentBlockReference) {
-
-		// https://docs.anthropic.com/claude/reference/messages-streaming
-
-		if (EventType.MESSAGE_START.equals(event.type())) {
-			contentBlockReference.set(new ChatCompletionResponseBuilder());
-
-			MessageStartEvent messageStartEvent = (MessageStartEvent) event;
-
-			contentBlockReference.get()
-				.withType(Objects.requireNonNull(event.type()).name())
-				.withId(messageStartEvent.message().id())
-				.withRole(messageStartEvent.message().role())
-				.withModel(messageStartEvent.message().model())
-				.withUsage(messageStartEvent.message().usage())
-				.withContent(new ArrayList<>());
-		}
-		else if (EventType.TOOL_USE_AGGREGATE.equals(event.type())) {
-			ToolUseAggregationEvent eventToolUseBuilder = (ToolUseAggregationEvent) event;
-
-			if (!CollectionUtils.isEmpty(eventToolUseBuilder.getToolContentBlocks())) {
-
-				List<ContentBlock> content = eventToolUseBuilder.getToolContentBlocks()
-					.stream()
-					.map(tooToUse -> new ContentBlock(Type.TOOL_USE, tooToUse.id(), tooToUse.name(), tooToUse.input()))
-					.toList();
-				contentBlockReference.get().withContent(content);
-			}
-		}
-		else if (EventType.CONTENT_BLOCK_START.equals(event.type())) {
-			ContentBlockStartEvent contentBlockStartEvent = (ContentBlockStartEvent) event;
-
-			if (contentBlockStartEvent.contentBlock() instanceof ContentBlockText textBlock) {
-				ContentBlock cb = new ContentBlock(Type.TEXT, null, textBlock.text(), contentBlockStartEvent.index());
-				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
-			}
-			else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
-				ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
-						null, null, null, null, thinkingBlock.signature(), thinkingBlock.thinking(), null, null, null,
-						null, null, null, null);
-				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
-			}
-			else {
-				throw new IllegalArgumentException(
-						"Unsupported content block type: " + contentBlockStartEvent.contentBlock().type());
-			}
-		}
-		else if (EventType.CONTENT_BLOCK_DELTA.equals(event.type())) {
-			ContentBlockDeltaEvent contentBlockDeltaEvent = (ContentBlockDeltaEvent) event;
-
-			if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaText txt) {
-				ContentBlock cb = new ContentBlock(Type.TEXT_DELTA, null, txt.text(), contentBlockDeltaEvent.index());
-				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
-			}
-			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
-				ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
-						null, null, null, null, null, null, thinking.thinking(), null, null, null, null, null, null,
-						null);
-				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
-			}
-			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
-				ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
-						null, null, null, null, null, sig.signature(), null, null, null, null, null, null, null, null);
-				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
-			}
-			else {
-				throw new IllegalArgumentException(
-						"Unsupported content block delta type: " + contentBlockDeltaEvent.delta().type());
-			}
-		}
-		else if (EventType.MESSAGE_DELTA.equals(event.type())) {
-
-			contentBlockReference.get().withType(event.type().name());
-
-			MessageDeltaEvent messageDeltaEvent = (MessageDeltaEvent) event;
-
-			if (StringUtils.hasText(messageDeltaEvent.delta().stopReason())) {
-				contentBlockReference.get().withStopReason(messageDeltaEvent.delta().stopReason());
-			}
-
-			if (StringUtils.hasText(messageDeltaEvent.delta().stopSequence())) {
-				contentBlockReference.get().withStopSequence(messageDeltaEvent.delta().stopSequence());
-			}
-
-			if (messageDeltaEvent.usage() != null) {
-				Usage usage = Objects.requireNonNull(contentBlockReference.get().usage);
-				Usage totalUsage = new Usage(usage.inputTokens(), messageDeltaEvent.usage().outputTokens(),
-						usage.cacheCreationInputTokens(), usage.cacheReadInputTokens());
-				contentBlockReference.get().withUsage(totalUsage);
-			}
-		}
-		else if (EventType.MESSAGE_STOP.equals(event.type())) {
-			// Don't return the latest Content block as it was before. Instead, return it
-			// with an updated event type and general information like: model, message
-			// type, id and usage
-			contentBlockReference.get()
-				.withType(event.type().name())
-				.withContent(List.of())
-				.withStopReason(null)
-				.withStopSequence(null);
-		}
-		else {
-			// Any other event types that should propagate upwards without content
-			// noinspection ConstantValue
-			if (contentBlockReference.get() == null) {
-				contentBlockReference.set(new ChatCompletionResponseBuilder());
-			}
-			contentBlockReference.get().withType(event.type().name()).withContent(List.of());
-			logger.warn("Unhandled event type: {}", event.type().name());
-		}
-
-		return Objects.requireNonNull(contentBlockReference.get()).build();
-	}
-
-	/**
-	 * Builder for {@link ChatCompletionResponse}. Used internally by {@link StreamHelper}
-	 * to aggregate stream events.
-	 */
-	public static class ChatCompletionResponseBuilder {
-
-		private @Nullable String type;
-
-		private @Nullable String id;
-
-		private @Nullable Role role;
-
-		private @Nullable List<ContentBlock> content;
-
-		private @Nullable String model;
-
-		private @Nullable String stopReason;
-
-		private @Nullable String stopSequence;
-
-		private @Nullable Usage usage;
-
-		public ChatCompletionResponseBuilder() {
-		}
-
-		public ChatCompletionResponseBuilder withType(@Nullable String type) {
-			this.type = type;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withId(@Nullable String id) {
-			this.id = id;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withRole(@Nullable Role role) {
-			this.role = role;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withContent(@Nullable List<ContentBlock> content) {
-			this.content = content;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withModel(@Nullable String model) {
-			this.model = model;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withStopReason(@Nullable String stopReason) {
-			this.stopReason = stopReason;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withStopSequence(@Nullable String stopSequence) {
-			this.stopSequence = stopSequence;
-			return this;
-		}
-
-		public ChatCompletionResponseBuilder withUsage(@Nullable Usage usage) {
-			this.usage = usage;
-			return this;
-		}
-
-		public ChatCompletionResponse build() {
-			Assert.state(this.id != null, "The id must not be null");
-			Assert.state(this.content != null, "The content must not be null");
-			Assert.state(this.model != null, "The model must not be null");
-			return new ChatCompletionResponse(this.id, this.type, this.role, this.content, this.model, this.stopReason,
-					this.stopSequence, this.usage, null);
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/package-info.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/package-info.java
deleted file mode 100644
index 46add76c92f..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-@NullMarked
-package org.springframework.ai.anthropic.api.utils;
-
-import org.jspecify.annotations.NullMarked;
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/AnthropicRateLimit.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/AnthropicRateLimit.java
deleted file mode 100644
index 02b1cf66dd6..00000000000
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/metadata/AnthropicRateLimit.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.metadata;
-
-import java.time.Duration;
-
-import org.springframework.ai.chat.metadata.RateLimit;
-
-/**
- * {@link RateLimit} implementation for {@literal OpenAI}.
- *
- * @author Christian Tzolov
- * @since 1.0.0
- */
-public class AnthropicRateLimit implements RateLimit {
-
-	private static final String RATE_LIMIT_STRING = "{ @type: %1$s, requestsLimit: %2$s, requestsRemaining: %3$s, requestsReset: %4$s, tokensLimit: %5$s; tokensRemaining: %6$s; tokensReset: %7$s }";
-
-	private final Long requestsLimit;
-
-	private final Long requestsRemaining;
-
-	private final Long tokensLimit;
-
-	private final Long tokensRemaining;
-
-	private final Duration requestsReset;
-
-	private final Duration tokensReset;
-
-	public AnthropicRateLimit(Long requestsLimit, Long requestsRemaining, Duration requestsReset, Long tokensLimit,
-			Long tokensRemaining, Duration tokensReset) {
-
-		this.requestsLimit = requestsLimit;
-		this.requestsRemaining = requestsRemaining;
-		this.requestsReset = requestsReset;
-		this.tokensLimit = tokensLimit;
-		this.tokensRemaining = tokensRemaining;
-		this.tokensReset = tokensReset;
-	}
-
-	@Override
-	public Long getRequestsLimit() {
-		return this.requestsLimit;
-	}
-
-	@Override
-	public Long getTokensLimit() {
-		return this.tokensLimit;
-	}
-
-	@Override
-	public Long getRequestsRemaining() {
-		return this.requestsRemaining;
-	}
-
-	@Override
-	public Long getTokensRemaining() {
-		return this.tokensRemaining;
-	}
-
-	@Override
-	public Duration getRequestsReset() {
-		return this.requestsReset;
-	}
-
-	@Override
-	public Duration getTokensReset() {
-		return this.tokensReset;
-	}
-
-	@Override
-	public String toString() {
-		return RATE_LIMIT_STRING.formatted(getClass().getName(), getRequestsLimit(), getRequestsRemaining(),
-				getRequestsReset(), getTokensLimit(), getTokensRemaining(), getTokensReset());
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/package-info.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/package-info.java
index bf3a26edba0..780e93db8c8 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/package-info.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/package-info.java
@@ -14,6 +14,40 @@
  * limitations under the License.
  */
 
+/**
+ * Spring AI integration with Anthropic's Claude models using the official
+ * <a href="https://github.com/anthropics/anthropic-sdk-java">Anthropic Java SDK</a>.
+ *
+ * <p>
+ * This package provides a {@link org.springframework.ai.chat.model.ChatModel}
+ * implementation that enables interaction with Claude models through Anthropic's Messages
+ * API. The integration supports both synchronous and streaming conversations,
+ * tool/function calling, and full observability through Micrometer.
+ *
+ * <p>
+ * <b>Key Classes:</b>
+ * <ul>
+ * <li>{@link org.springframework.ai.anthropic.AnthropicChatModel} - Main chat model
+ * implementation</li>
+ * <li>{@link org.springframework.ai.anthropic.AnthropicChatOptions} - Configuration
+ * options for chat requests</li>
+ * </ul>
+ *
+ * <p>
+ * <b>Quick Start:</b> <pre>{@code
+ * AnthropicChatModel chatModel = new AnthropicChatModel(
+ *     AnthropicChatOptions.builder()
+ *         .model("claude-sonnet-4-20250514")
+ *         .maxTokens(1024)
+ *         .build());
+ *
+ * ChatResponse response = chatModel.call(new Prompt("Hello, Claude!"));
+ * }</pre>
+ *
+ * @since 2.0.0
+ * @see org.springframework.ai.anthropic.AnthropicChatModel
+ * @see org.springframework.ai.anthropic.AnthropicChatOptions
+ */
 @NullMarked
 package org.springframework.ai.anthropic;
 
diff --git a/models/spring-ai-anthropic/src/main/resources/META-INF/spring/aot.factories b/models/spring-ai-anthropic/src/main/resources/META-INF/spring/aot.factories
deleted file mode 100644
index 0052c5ad474..00000000000
--- a/models/spring-ai-anthropic/src/main/resources/META-INF/spring/aot.factories
+++ /dev/null
@@ -1,2 +0,0 @@
-org.springframework.aot.hint.RuntimeHintsRegistrar=\
-	org.springframework.ai.anthropic.aot.AnthropicRuntimeHints
\ No newline at end of file
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicCacheOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCacheOptionsTests.java
similarity index 59%
rename from models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicCacheOptionsTests.java
rename to models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCacheOptionsTests.java
index 855bcc0c326..5e8afc3459f 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicCacheOptionsTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCacheOptionsTests.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api;
-
-import java.util.function.Function;
+package org.springframework.ai.anthropic;
 
 import org.junit.jupiter.api.Test;
 
@@ -27,38 +25,51 @@
 /**
  * Tests for {@link AnthropicCacheOptions}.
  *
- * @author Austin Dase
+ * @author Soby Chacko
  */
 class AnthropicCacheOptionsTests {
 
 	@Test
 	void defaultsAreSane() {
-		AnthropicCacheOptions options = new AnthropicCacheOptions();
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder().build();
+
 		assertThat(options.getStrategy()).isEqualTo(AnthropicCacheStrategy.NONE);
-		// All message types default to FIVE_MINUTES and min content length 1
-		for (MessageType mt : MessageType.values()) {
-			assertThat(options.getMessageTypeTtl().get(mt)).isEqualTo(AnthropicCacheTtl.FIVE_MINUTES);
-			assertThat(options.getMessageTypeMinContentLengths().get(mt)).isEqualTo(1);
-		}
-		// Default content length function returns string length (null -> 0)
-		assertThat(options.getContentLengthFunction().apply("abc")).isEqualTo(3);
+		assertThat(options.getMessageTypeTtl().get(MessageType.SYSTEM)).isEqualTo(AnthropicCacheTtl.FIVE_MINUTES);
+		assertThat(options.getMessageTypeMinContentLengths().get(MessageType.SYSTEM)).isEqualTo(1);
+		assertThat(options.getContentLengthFunction().apply("hello")).isEqualTo(5);
 		assertThat(options.getContentLengthFunction().apply(null)).isEqualTo(0);
 	}
 
 	@Test
 	void builderOverrides() {
-		Function<String, Integer> clf = s -> 123;
 		AnthropicCacheOptions options = AnthropicCacheOptions.builder()
 			.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
 			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
 			.messageTypeMinContentLength(MessageType.SYSTEM, 100)
-			.contentLengthFunction(clf)
+			.contentLengthFunction(s -> s != null ? s.length() * 2 : 0)
 			.build();
 
 		assertThat(options.getStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_AND_TOOLS);
 		assertThat(options.getMessageTypeTtl().get(MessageType.SYSTEM)).isEqualTo(AnthropicCacheTtl.ONE_HOUR);
 		assertThat(options.getMessageTypeMinContentLengths().get(MessageType.SYSTEM)).isEqualTo(100);
-		assertThat(options.getContentLengthFunction()).isSameAs(clf);
+		assertThat(options.getContentLengthFunction().apply("test")).isEqualTo(8);
+	}
+
+	@Test
+	void multiBlockSystemCachingDefaultsToFalse() {
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder().build();
+		assertThat(options.isMultiBlockSystemCaching()).isFalse();
+	}
+
+	@Test
+	void multiBlockSystemCachingBuilderOverride() {
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder().multiBlockSystemCaching(true).build();
+		assertThat(options.isMultiBlockSystemCaching()).isTrue();
+	}
+
+	@Test
+	void disabledSingletonHasNoneStrategy() {
+		assertThat(AnthropicCacheOptions.disabled().getStrategy()).isEqualTo(AnthropicCacheStrategy.NONE);
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelAdditionalHttpHeadersIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelAdditionalHttpHeadersIT.java
deleted file mode 100644
index 6cd65400a9c..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelAdditionalHttpHeadersIT.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import java.util.Map;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.retry.NonTransientAiException;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.SpringBootConfiguration;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.context.annotation.Bean;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
-
-/**
- * @author Christian Tzolov
- */
-@SpringBootTest(classes = AnthropicChatModelAdditionalHttpHeadersIT.Config.class)
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-public class AnthropicChatModelAdditionalHttpHeadersIT {
-
-	@Autowired
-	private AnthropicChatModel chatModel;
-
-	@Test
-	void additionalApiKeyHeader() {
-
-		assertThatThrownBy(() -> this.chatModel.call("Tell me a joke")).isInstanceOf(NonTransientAiException.class);
-
-		// Use the additional headers to override the Api Key.
-		// Mind that you have to prefix the Api Key with the "Bearer " prefix.
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.httpHeaders(Map.of("x-api-key", System.getenv("ANTHROPIC_API_KEY")))
-			.build();
-
-		ChatResponse response = this.chatModel.call(new Prompt("Tell me a joke", options));
-
-		assertThat(response).isNotNull();
-	}
-
-	@SpringBootConfiguration
-	static class Config {
-
-		@Bean
-		public AnthropicApi anthropicApi() {
-			return AnthropicApi.builder().apiKey("Invalid API Key").build();
-		}
-
-		@Bean
-		public AnthropicChatModel anthropicChatModel(AnthropicApi api) {
-			return AnthropicChatModel.builder().anthropicApi(api).build();
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelSkillsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelSkillsTests.java
deleted file mode 100644
index 801a9abbdad..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelSkillsTests.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import io.micrometer.observation.ObservationRegistry;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.mockito.Mock;
-import org.mockito.junit.jupiter.MockitoExtension;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicSkill;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.model.tool.ToolCallingManager;
-import org.springframework.ai.retry.RetryUtils;
-import org.springframework.core.retry.RetryTemplate;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Unit tests for {@link AnthropicChatModel} with Skills support.
- *
- * @author Soby Chacko
- * @since 2.0.0
- */
-@ExtendWith(MockitoExtension.class)
-class AnthropicChatModelSkillsTests {
-
-	@Mock
-	private AnthropicApi anthropicApi;
-
-	private AnthropicChatModel createChatModel(AnthropicChatOptions defaultOptions) {
-		RetryTemplate retryTemplate = RetryUtils.SHORT_RETRY_TEMPLATE;
-		ObservationRegistry observationRegistry = ObservationRegistry.NOOP;
-		ToolCallingManager toolCallingManager = ToolCallingManager.builder().build();
-		return new AnthropicChatModel(this.anthropicApi, defaultOptions, toolCallingManager, retryTemplate,
-				observationRegistry);
-	}
-
-	@Test
-	void shouldIncludeSkillsFromRequestOptions() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(1);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("xlsx");
-	}
-
-	@Test
-	void shouldIncludeSkillsFromDefaultOptions() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.skill(AnthropicSkill.PPTX)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		// Pass empty options to avoid null check failures
-		Prompt prompt = new Prompt("Create a presentation", AnthropicChatOptions.builder().build());
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(1);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("pptx");
-	}
-
-	@Test
-	void shouldPrioritizeRequestOptionsOverDefaultOptions() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.skill(AnthropicSkill.PPTX)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(1);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("xlsx");
-	}
-
-	@Test
-	void shouldIncludeMultipleSkills() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX)
-			.skill("my-custom-skill")
-			.build();
-
-		Prompt prompt = new Prompt("Create documents", requestOptions);
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(3);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(request.container().skills().get(1).skillId()).isEqualTo("pptx");
-		assertThat(request.container().skills().get(2).skillId()).isEqualTo("my-custom-skill");
-	}
-
-	@Test
-	void shouldHandleNullSkillsGracefully() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		// Pass empty options to avoid null check failures
-		Prompt prompt = new Prompt("Simple question", AnthropicChatOptions.builder().build());
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNull();
-	}
-
-	@Test
-	void shouldIncludeSkillsWithVersion() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX, "20251013")
-			.build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(1);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(request.container().skills().get(0).version()).isEqualTo("20251013");
-	}
-
-	@Test
-	void shouldAddSkillsBetaHeaderWhenSkillsPresent() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		chatModel.createRequest(prompt, false);
-
-		assertThat(requestOptions.getHttpHeaders()).isNotNull();
-		assertThat(requestOptions.getHttpHeaders()).containsKey("anthropic-beta");
-		String betaHeader = requestOptions.getHttpHeaders().get("anthropic-beta");
-		assertThat(betaHeader).contains(AnthropicApi.BETA_SKILLS);
-		assertThat(betaHeader).contains(AnthropicApi.BETA_CODE_EXECUTION);
-		assertThat(betaHeader).contains(AnthropicApi.BETA_FILES_API);
-	}
-
-	@Test
-	void shouldNotAddSkillsBetaHeaderWhenNoSkills() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().build();
-
-		Prompt prompt = new Prompt("Simple question", requestOptions);
-
-		chatModel.createRequest(prompt, false);
-
-		assertThat(requestOptions.getHttpHeaders().get("anthropic-beta")).isNull();
-	}
-
-	@Test
-	void shouldAppendSkillsBetaHeaderToExistingBetaHeaders() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		java.util.Map<String, String> existingHeaders = new java.util.HashMap<>();
-		existingHeaders.put("anthropic-beta", "some-other-beta");
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX)
-			.httpHeaders(existingHeaders)
-			.build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		chatModel.createRequest(prompt, false);
-
-		String betaHeader = requestOptions.getHttpHeaders().get("anthropic-beta");
-		assertThat(betaHeader).contains("some-other-beta")
-			.contains(AnthropicApi.BETA_SKILLS)
-			.contains(AnthropicApi.BETA_CODE_EXECUTION)
-			.contains(AnthropicApi.BETA_FILES_API);
-	}
-
-	@Test
-	void shouldAutomaticallyAddCodeExecutionToolWhenSkillsPresent() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		// Verify code_execution tool is automatically added
-		assertThat(request.tools()).isNotNull();
-		assertThat(request.tools()).hasSize(1);
-		assertThat(request.tools().get(0).name()).isEqualTo("code_execution");
-	}
-
-	@Test
-	void shouldNotDuplicateCodeExecutionToolIfAlreadyPresent() {
-		AnthropicChatOptions defaultOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(1024)
-			.build();
-
-		AnthropicChatModel chatModel = createChatModel(defaultOptions);
-
-		AnthropicChatOptions requestOptions = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		Prompt prompt = new Prompt("Create a spreadsheet", requestOptions);
-
-		// Note: We can't easily test this without exposing more of the internal state,
-		// but the implementation checks for existing code_execution tool
-		ChatCompletionRequest request = chatModel.createRequest(prompt, false);
-
-		// Should have exactly 1 tool (code_execution), not duplicated
-		assertThat(request.tools()).isNotNull();
-		assertThat(request.tools()).hasSize(1);
-		assertThat(request.tools().get(0).name()).isEqualTo("code_execution");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelTests.java
new file mode 100644
index 00000000000..43b2c920c7f
--- /dev/null
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelTests.java
@@ -0,0 +1,411 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.client.AnthropicClientAsync;
+import com.anthropic.core.JsonValue;
+import com.anthropic.models.messages.ContentBlock;
+import com.anthropic.models.messages.Message;
+import com.anthropic.models.messages.MessageCreateParams;
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.StopReason;
+import com.anthropic.models.messages.TextBlock;
+import com.anthropic.models.messages.ToolUseBlock;
+import com.anthropic.models.messages.Usage;
+import com.anthropic.services.blocking.MessageService;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.junit.jupiter.MockitoSettings;
+import org.mockito.quality.Strictness;
+
+import org.springframework.ai.chat.messages.AssistantMessage;
+import org.springframework.ai.chat.messages.SystemMessage;
+import org.springframework.ai.chat.messages.UserMessage;
+import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.ai.chat.prompt.Prompt;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.BDDMockito.given;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Unit tests for {@link AnthropicChatModel}. Tests request building and response parsing
+ * with mocked SDK client.
+ *
+ * @author Soby Chacko
+ */
+@ExtendWith(MockitoExtension.class)
+@MockitoSettings(strictness = Strictness.LENIENT)
+class AnthropicChatModelTests {
+
+	@Mock
+	private AnthropicClient anthropicClient;
+
+	@Mock
+	private AnthropicClientAsync anthropicClientAsync;
+
+	@Mock
+	private MessageService messageService;
+
+	private AnthropicChatModel chatModel;
+
+	@BeforeEach
+	void setUp() {
+		given(this.anthropicClient.messages()).willReturn(this.messageService);
+
+		this.chatModel = AnthropicChatModel.builder()
+			.anthropicClient(this.anthropicClient)
+			.anthropicClientAsync(this.anthropicClientAsync)
+			.options(AnthropicChatOptions.builder()
+				.model(Model.CLAUDE_SONNET_4_20250514)
+				.maxTokens(1024)
+				.temperature(0.7)
+				.build())
+			.build();
+	}
+
+	@Test
+	void callWithSimpleUserMessage() {
+		Message mockResponse = createMockMessage("Hello! How can I help you today?", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		ChatResponse response = this.chatModel.call(new Prompt("Hello"));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult()).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).isEqualTo("Hello! How can I help you today?");
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.model().asString()).isEqualTo("claude-sonnet-4-20250514");
+		assertThat(request.maxTokens()).isEqualTo(1024);
+	}
+
+	@Test
+	void callWithSystemAndUserMessages() {
+		Message mockResponse = createMockMessage("I am a helpful assistant.", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		SystemMessage systemMessage = new SystemMessage("You are a helpful assistant.");
+		UserMessage userMessage = new UserMessage("Who are you?");
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(systemMessage, userMessage)));
+
+		assertThat(response.getResult().getOutput().getText()).isEqualTo("I am a helpful assistant.");
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.system()).isPresent();
+	}
+
+	@Test
+	void callWithRuntimeOptionsOverride() {
+		Message mockResponse = createMockMessage("Response with override", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		AnthropicChatOptions runtimeOptions = AnthropicChatOptions.builder()
+			.model("claude-3-opus-20240229")
+			.maxTokens(2048)
+			.temperature(0.3)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Test", runtimeOptions));
+
+		assertThat(response).isNotNull();
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.model().asString()).isEqualTo("claude-3-opus-20240229");
+		assertThat(request.maxTokens()).isEqualTo(2048);
+	}
+
+	@Test
+	void responseContainsUsageMetadata() {
+		Message mockResponse = createMockMessage("Test response", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		ChatResponse response = this.chatModel.call(new Prompt("Test"));
+
+		assertThat(response.getMetadata()).isNotNull();
+		assertThat(response.getMetadata().getUsage()).isNotNull();
+		assertThat(response.getMetadata().getUsage().getPromptTokens()).isEqualTo(10);
+		assertThat(response.getMetadata().getUsage().getCompletionTokens()).isEqualTo(20);
+		assertThat(response.getMetadata().getUsage().getTotalTokens()).isEqualTo(30);
+	}
+
+	@Test
+	void responseContainsFinishReason() {
+		Message mockResponse = createMockMessage("Stopped at max tokens", StopReason.MAX_TOKENS);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		ChatResponse response = this.chatModel.call(new Prompt("Test"));
+
+		assertThat(response.getResult().getMetadata().getFinishReason()).isEqualTo("max_tokens");
+	}
+
+	@Test
+	void responseWithToolUseBlock() {
+		Message mockResponse = createMockMessageWithToolUse("toolu_123", "getCurrentWeather",
+				JsonValue.from(java.util.Map.of("location", "San Francisco")), StopReason.TOOL_USE);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		// Disable internal tool execution to verify tool call parsing only
+		AnthropicChatOptions options = AnthropicChatOptions.builder().internalToolExecutionEnabled(false).build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("What's the weather?", options));
+
+		assertThat(response.getResult()).isNotNull();
+		AssistantMessage output = response.getResult().getOutput();
+		assertThat(output.getToolCalls()).isNotEmpty();
+		assertThat(output.getToolCalls()).hasSize(1);
+
+		var toolCall = output.getToolCalls().get(0);
+		assertThat(toolCall.id()).isEqualTo("toolu_123");
+		assertThat(toolCall.name()).isEqualTo("getCurrentWeather");
+		assertThat(toolCall.arguments()).contains("San Francisco");
+	}
+
+	@Test
+	void getDefaultOptionsReturnsCopy() {
+		var defaultOptions1 = this.chatModel.getDefaultOptions();
+		var defaultOptions2 = this.chatModel.getDefaultOptions();
+
+		assertThat(defaultOptions1).isNotSameAs(defaultOptions2);
+		assertThat(defaultOptions1.getModel()).isEqualTo(defaultOptions2.getModel());
+	}
+
+	@Test
+	void buildRequestPromptMergesOptions() {
+		AnthropicChatModel model = AnthropicChatModel.builder()
+			.anthropicClient(this.anthropicClient)
+			.anthropicClientAsync(this.anthropicClientAsync)
+			.options(AnthropicChatOptions.builder().model("default-model").maxTokens(1000).temperature(0.5).build())
+			.build();
+
+		AnthropicChatOptions runtimeOptions = AnthropicChatOptions.builder().temperature(0.9).build();
+
+		Prompt originalPrompt = new Prompt("Test", runtimeOptions);
+		Prompt requestPrompt = model.buildRequestPrompt(originalPrompt);
+
+		AnthropicChatOptions mergedOptions = (AnthropicChatOptions) requestPrompt.getOptions();
+		assertThat(mergedOptions.getModel()).isEqualTo("default-model");
+		assertThat(mergedOptions.getMaxTokens()).isEqualTo(1000);
+		assertThat(mergedOptions.getTemperature()).isEqualTo(0.9);
+	}
+
+	@Test
+	void cacheOptionsIsMergedFromRuntimePrompt() {
+		AnthropicChatModel model = AnthropicChatModel.builder()
+			.anthropicClient(this.anthropicClient)
+			.anthropicClientAsync(this.anthropicClientAsync)
+			.options(AnthropicChatOptions.builder().model("default-model").maxTokens(1000).build())
+			.build();
+
+		AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+			.build();
+
+		AnthropicChatOptions runtimeOptions = AnthropicChatOptions.builder().cacheOptions(cacheOptions).build();
+
+		Prompt originalPrompt = new Prompt("Test", runtimeOptions);
+		Prompt requestPrompt = model.buildRequestPrompt(originalPrompt);
+
+		AnthropicChatOptions mergedOptions = (AnthropicChatOptions) requestPrompt.getOptions();
+		assertThat(mergedOptions.getCacheOptions()).isNotNull();
+		assertThat(mergedOptions.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_ONLY);
+	}
+
+	@Test
+	void multiTurnConversation() {
+		Message mockResponse = createMockMessage("Paris is the capital of France.", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		UserMessage user1 = new UserMessage("What is the capital of France?");
+		AssistantMessage assistant1 = new AssistantMessage("The capital of France is Paris.");
+		UserMessage user2 = new UserMessage("What is its population?");
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(user1, assistant1, user2)));
+
+		assertThat(response.getResult().getOutput().getText()).isEqualTo("Paris is the capital of France.");
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.messages()).hasSize(3);
+	}
+
+	@Test
+	void callWithOutputConfig() {
+		Message mockResponse = createMockMessage("{ \"name\": \"test\" }", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		OutputConfig outputConfig = OutputConfig.builder().effort(OutputConfig.Effort.HIGH).build();
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder().outputConfig(outputConfig).build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Generate JSON", options));
+
+		assertThat(response).isNotNull();
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.outputConfig()).isPresent();
+		assertThat(request.outputConfig().get().effort()).isPresent();
+		assertThat(request.outputConfig().get().effort().get()).isEqualTo(OutputConfig.Effort.HIGH);
+	}
+
+	@Test
+	void callWithOutputSchema() {
+		Message mockResponse = createMockMessage("{ \"name\": \"France\" }", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.outputSchema("{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"}}}")
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Generate JSON", options));
+
+		assertThat(response).isNotNull();
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request.outputConfig()).isPresent();
+		assertThat(request.outputConfig().get().format()).isPresent();
+	}
+
+	@Test
+	void callWithHttpHeaders() {
+		Message mockResponse = createMockMessage("Hello", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.httpHeaders(Map.of("X-Custom-Header", "custom-value", "X-Request-Id", "req-123"))
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Hello", options));
+
+		assertThat(response).isNotNull();
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		assertThat(request._additionalHeaders().values("X-Custom-Header")).contains("custom-value");
+		assertThat(request._additionalHeaders().values("X-Request-Id")).contains("req-123");
+	}
+
+	@Test
+	void callWithSkillContainerWiresAdditionalBodyAndBetaHeaders() {
+		Message mockResponse = createMockMessage("Created spreadsheet", StopReason.END_TURN);
+		given(this.messageService.create(any(MessageCreateParams.class))).willReturn(mockResponse);
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.skill(AnthropicSkill.XLSX)
+			.internalToolExecutionEnabled(false)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Create an Excel file", options));
+
+		assertThat(response).isNotNull();
+
+		ArgumentCaptor<MessageCreateParams> captor = ArgumentCaptor.forClass(MessageCreateParams.class);
+		verify(this.messageService).create(captor.capture());
+
+		MessageCreateParams request = captor.getValue();
+		// Verify beta headers are set for skills
+		assertThat(request._additionalHeaders().values("anthropic-beta")).isNotEmpty();
+		String betaHeader = String.join(",", request._additionalHeaders().values("anthropic-beta"));
+		assertThat(betaHeader).contains("skills-2025-10-02");
+		assertThat(betaHeader).contains("code-execution-2025-08-25");
+		assertThat(betaHeader).contains("files-api-2025-04-14");
+		// Verify container body property is set
+		assertThat(request._additionalBodyProperties()).containsKey("container");
+	}
+
+	private Message createMockMessage(String text, StopReason stopReason) {
+		TextBlock textBlock = mock(TextBlock.class);
+		given(textBlock.text()).willReturn(text);
+
+		ContentBlock contentBlock = mock(ContentBlock.class);
+		given(contentBlock.isText()).willReturn(true);
+		given(contentBlock.isToolUse()).willReturn(false);
+		given(contentBlock.asText()).willReturn(textBlock);
+
+		Usage usage = mock(Usage.class);
+		given(usage.inputTokens()).willReturn(10L);
+		given(usage.outputTokens()).willReturn(20L);
+
+		Message message = mock(Message.class);
+		given(message.id()).willReturn("msg_123");
+		given(message.model()).willReturn(Model.CLAUDE_SONNET_4_20250514);
+		given(message.content()).willReturn(List.of(contentBlock));
+		given(message.stopReason()).willReturn(Optional.of(stopReason));
+		given(message.usage()).willReturn(usage);
+
+		return message;
+	}
+
+	private Message createMockMessageWithToolUse(String toolId, String toolName, JsonValue input,
+			StopReason stopReason) {
+		ToolUseBlock toolUseBlock = mock(ToolUseBlock.class);
+		given(toolUseBlock.id()).willReturn(toolId);
+		given(toolUseBlock.name()).willReturn(toolName);
+		given(toolUseBlock._input()).willReturn(input);
+
+		ContentBlock contentBlock = mock(ContentBlock.class);
+		given(contentBlock.isText()).willReturn(false);
+		given(contentBlock.isToolUse()).willReturn(true);
+		given(contentBlock.asToolUse()).willReturn(toolUseBlock);
+
+		Usage usage = mock(Usage.class);
+		given(usage.inputTokens()).willReturn(15L);
+		given(usage.outputTokens()).willReturn(25L);
+
+		Message message = mock(Message.class);
+		given(message.id()).willReturn("msg_456");
+		given(message.model()).willReturn(Model.CLAUDE_SONNET_4_20250514);
+		given(message.content()).willReturn(List.of(contentBlock));
+		given(message.stopReason()).willReturn(Optional.of(stopReason));
+		given(message.usage()).willReturn(usage);
+
+		return message;
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsSkillsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsSkillsTests.java
deleted file mode 100644
index a9d440dbb85..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsSkillsTests.java
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicSkill;
-import org.springframework.ai.anthropic.api.AnthropicApi.Skill;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillContainer;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillType;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Unit tests for {@link AnthropicChatOptions} with Skills support.
- *
- * @author Soby Chacko
- * @since 2.0.0
- */
-class AnthropicChatOptionsSkillsTests {
-
-	@Test
-	void shouldBuildOptionsWithSingleSkill() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX).build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(1);
-		assertThat(options.getSkillContainer().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(options.getSkillContainer().skills().get(0).type()).isEqualTo(SkillType.ANTHROPIC);
-	}
-
-	@Test
-	void shouldBuildOptionsWithMultipleSkills() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX)
-			.skill("my-custom-skill")
-			.build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(3);
-		assertThat(options.getSkillContainer().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(options.getSkillContainer().skills().get(1).skillId()).isEqualTo("pptx");
-		assertThat(options.getSkillContainer().skills().get(2).skillId()).isEqualTo("my-custom-skill");
-		assertThat(options.getSkillContainer().skills().get(2).type()).isEqualTo(SkillType.CUSTOM);
-	}
-
-	@Test
-	void shouldBuildOptionsWithSkillContainer() {
-		SkillContainer container = SkillContainer.builder().skill(AnthropicSkill.DOCX).build();
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder().skillContainer(container).build();
-
-		assertThat(options.getSkillContainer()).isSameAs(container);
-		assertThat(options.getSkillContainer().skills()).hasSize(1);
-	}
-
-	@Test
-	void shouldBuildOptionsWithSkillVersion() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().skill(AnthropicSkill.XLSX, "20251013").build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(1);
-		assertThat(options.getSkillContainer().skills().get(0).version()).isEqualTo("20251013");
-	}
-
-	@Test
-	void shouldBuildOptionsWithCustomSkillVersion() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().skill("my-skill", "1.0.0").build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(1);
-		assertThat(options.getSkillContainer().skills().get(0).skillId()).isEqualTo("my-skill");
-		assertThat(options.getSkillContainer().skills().get(0).version()).isEqualTo("1.0.0");
-		assertThat(options.getSkillContainer().skills().get(0).type()).isEqualTo(SkillType.CUSTOM);
-	}
-
-	@Test
-	void shouldCopyOptionsWithSkills() {
-		SkillContainer container = SkillContainer.builder().skill(AnthropicSkill.PDF).build();
-
-		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.model("claude-sonnet-4-5")
-			.maxTokens(2048)
-			.skillContainer(container)
-			.build();
-
-		AnthropicChatOptions copy = AnthropicChatOptions.fromOptions(original);
-
-		assertThat(copy.getSkillContainer()).isNotNull();
-		assertThat(copy.getSkillContainer()).isSameAs(original.getSkillContainer());
-		assertThat(copy.getSkillContainer().skills()).hasSize(1);
-		assertThat(copy.getModel()).isEqualTo(original.getModel());
-		assertThat(copy.getMaxTokens()).isEqualTo(original.getMaxTokens());
-	}
-
-	@Test
-	void shouldIncludeSkillsInEqualsAndHashCode() {
-		SkillContainer container = SkillContainer.builder().skill(AnthropicSkill.XLSX).build();
-
-		AnthropicChatOptions options1 = AnthropicChatOptions.builder().skillContainer(container).build();
-
-		AnthropicChatOptions options2 = AnthropicChatOptions.builder().skillContainer(container).build();
-
-		AnthropicChatOptions options3 = AnthropicChatOptions.builder().skill(AnthropicSkill.PPTX).build();
-
-		assertThat(options1).isEqualTo(options2);
-		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
-		assertThat(options1).isNotEqualTo(options3);
-	}
-
-	@Test
-	void shouldBuildOptionsWithSkillMethod() {
-		Skill skill = new Skill(SkillType.ANTHROPIC, "docx", "latest");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder().skill(skill).build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(1);
-		assertThat(options.getSkillContainer().skills().get(0)).isSameAs(skill);
-	}
-
-	@Test
-	void shouldAllowNullSkillContainer() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("claude-sonnet-4-5").build();
-
-		assertThat(options.getSkillContainer()).isNull();
-	}
-
-	@Test
-	void shouldAddMultipleSkillsSequentially() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX)
-			.skill(AnthropicSkill.DOCX)
-			.build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(3);
-	}
-
-	@Test
-	void shouldPreserveExistingSkillsWhenAddingNew() {
-		SkillContainer initialContainer = SkillContainer.builder().skill(AnthropicSkill.XLSX).build();
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.skillContainer(initialContainer)
-			.skill(AnthropicSkill.PPTX)
-			.build();
-
-		assertThat(options.getSkillContainer()).isNotNull();
-		assertThat(options.getSkillContainer().skills()).hasSize(2);
-		assertThat(options.getSkillContainer().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(options.getSkillContainer().skills().get(1).skillId()).isEqualTo("pptx");
-	}
-
-	@Test
-	void shouldSetSkillContainerViaGetter() {
-		AnthropicChatOptions options = new AnthropicChatOptions();
-		SkillContainer container = SkillContainer.builder().skill(AnthropicSkill.PDF).build();
-
-		options.setSkillContainer(container);
-
-		assertThat(options.getSkillContainer()).isSameAs(container);
-	}
-
-	@Test
-	void shouldCopyOptionsWithNullSkills() {
-		AnthropicChatOptions original = AnthropicChatOptions.builder().model("claude-sonnet-4-5").build();
-
-		AnthropicChatOptions copy = AnthropicChatOptions.fromOptions(original);
-
-		assertThat(copy.getSkillContainer()).isNull();
-	}
-
-	@Test
-	void shouldMaintainSkillOrderWhenAdding() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill("skill-a")
-			.skill(AnthropicSkill.PPTX)
-			.skill("skill-b")
-			.build();
-
-		assertThat(options.getSkillContainer().skills()).hasSize(4);
-		assertThat(options.getSkillContainer().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(options.getSkillContainer().skills().get(1).skillId()).isEqualTo("skill-a");
-		assertThat(options.getSkillContainer().skills().get(2).skillId()).isEqualTo("pptx");
-		assertThat(options.getSkillContainer().skills().get(3).skillId()).isEqualTo("skill-b");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
index 8a6ec74efb9..b1c7a21d30a 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
@@ -16,37 +16,51 @@
 
 package org.springframework.ai.anthropic;
 
-import java.util.Collections;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
-import net.javacrumbs.jsonunit.assertj.JsonAssertions;
-import org.assertj.core.api.Assertions;
+import java.util.Set;
+
+import com.anthropic.core.JsonValue;
+import com.anthropic.models.messages.JsonOutputFormat;
+import com.anthropic.models.messages.Metadata;
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.ToolChoice;
+import com.anthropic.models.messages.ToolChoiceAuto;
 import org.junit.jupiter.api.Test;
 
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.Metadata;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatModel;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheStrategy;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
-import org.springframework.ai.chat.messages.MessageType;
+import org.springframework.ai.model.tool.StructuredOutputChatOptions;
 import org.springframework.ai.test.options.AbstractChatOptionsTests;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 /**
- * Tests for {@link AnthropicChatOptions}.
+ * Unit tests for {@link AnthropicChatOptions}. Focuses on critical behaviors: builder,
+ * copy, mutate, combineWith, equals/hashCode, and validation.
  *
- * @author Alexandros Pappas
  * @author Soby Chacko
- * @author Austin Dase
- * @author Filip Hrisafov
  */
 class AnthropicChatOptionsTests<B extends AnthropicChatOptions.Builder<B>>
 		extends AbstractChatOptionsTests<AnthropicChatOptions, B> {
 
+	@Override
+	protected Class<AnthropicChatOptions> getConcreteOptionsClass() {
+		return AnthropicChatOptions.class;
+	}
+
+	@Override
+	@SuppressWarnings("unchecked")
+	protected B readyToBuildBuilder() {
+		return (B) AnthropicChatOptions.builder().model(Model.CLAUDE_HAIKU_4_5).maxTokens(500);
+	}
+
 	@Test
 	void testBuilderWithAllFields() {
+		Metadata metadata = Metadata.builder().userId("userId_123").build();
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
 			.model("test-model")
 			.maxTokens(100)
@@ -54,125 +68,134 @@ void testBuilderWithAllFields() {
 			.temperature(0.7)
 			.topP(0.8)
 			.topK(50)
-			.metadata(new Metadata("userId_123"))
+			.metadata(metadata)
+			.baseUrl("https://custom.api.com")
+			.timeout(Duration.ofSeconds(120))
+			.maxRetries(5)
+			.toolChoice(ToolChoice.ofAuto(ToolChoiceAuto.builder().build()))
+			.disableParallelToolUse(true)
+			.toolNames("tool1", "tool2")
+			.toolContext(Map.of("key", "value"))
+			.internalToolExecutionEnabled(true)
 			.build();
 
-		assertThat(options).extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata")
-			.containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"));
+		assertThat(options.getModel()).isEqualTo("test-model");
+		assertThat(options.getMaxTokens()).isEqualTo(100);
+		assertThat(options.getStopSequences()).containsExactly("stop1", "stop2");
+		assertThat(options.getTemperature()).isEqualTo(0.7);
+		assertThat(options.getTopP()).isEqualTo(0.8);
+		assertThat(options.getTopK()).isEqualTo(50);
+		assertThat(options.getMetadata()).isEqualTo(metadata);
+		assertThat(options.getBaseUrl()).isEqualTo("https://custom.api.com");
+		assertThat(options.getTimeout()).isEqualTo(Duration.ofSeconds(120));
+		assertThat(options.getMaxRetries()).isEqualTo(5);
+		assertThat(options.getToolChoice()).isNotNull();
+		assertThat(options.getDisableParallelToolUse()).isTrue();
+		assertThat(options.getToolNames()).containsExactlyInAnyOrder("tool1", "tool2");
+		assertThat(options.getToolContext()).containsEntry("key", "value");
+		assertThat(options.getInternalToolExecutionEnabled()).isTrue();
 	}
 
 	@Test
-	void testCopy() {
+	void testBuilderWithModelEnum() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().model(Model.CLAUDE_SONNET_4_20250514).build();
+
+		assertThat(options.getModel()).isEqualTo("claude-sonnet-4-20250514");
+	}
+
+	@Test
+	void testCopyCreatesIndependentInstance() {
+		Metadata metadata = Metadata.builder().userId("userId_123").build();
+		List<String> mutableStops = new ArrayList<>(List.of("stop1", "stop2"));
+		Map<String, Object> mutableContext = new HashMap<>(Map.of("key1", "value1"));
+
 		AnthropicChatOptions original = AnthropicChatOptions.builder()
 			.model("test-model")
 			.maxTokens(100)
-			.stopSequences(List.of("stop1", "stop2"))
+			.stopSequences(mutableStops)
 			.temperature(0.7)
 			.topP(0.8)
 			.topK(50)
-			.metadata(new Metadata("userId_123"))
-			.toolContext(Map.of("key1", "value1"))
+			.metadata(metadata)
+			.toolContext(mutableContext)
+			.disableParallelToolUse(true)
 			.build();
 
 		AnthropicChatOptions copied = original.copy();
 
-		assertThat(copied).isNotSameAs(original).isEqualTo(original);
-		// Ensure deep copy
+		// Verify copied is equal but not same instance
+		assertThat(copied).isNotSameAs(original);
+		assertThat(copied).isEqualTo(original);
+
+		// Verify collections are deep copied
 		assertThat(copied.getStopSequences()).isNotSameAs(original.getStopSequences());
 		assertThat(copied.getToolContext()).isNotSameAs(original.getToolContext());
-	}
-
-	@Test
-	void testSetters() {
-		AnthropicChatOptions options = new AnthropicChatOptions();
-		options.setModel("test-model");
-		options.setMaxTokens(100);
-		options.setTemperature(0.7);
-		options.setTopK(50);
-		options.setTopP(0.8);
-		options.setStopSequences(List.of("stop1", "stop2"));
-		options.setMetadata(new Metadata("userId_123"));
 
-		assertThat(options.getModel()).isEqualTo("test-model");
-		assertThat(options.getMaxTokens()).isEqualTo(100);
-		assertThat(options.getTemperature()).isEqualTo(0.7);
-		assertThat(options.getTopK()).isEqualTo(50);
-		assertThat(options.getTopP()).isEqualTo(0.8);
-		assertThat(options.getStopSequences()).isEqualTo(List.of("stop1", "stop2"));
-		assertThat(options.getMetadata()).isEqualTo(new Metadata("userId_123"));
-	}
+		// Modify copy and verify original is unchanged
+		copied.setModel("modified-model");
+		copied.setMaxTokens(200);
+		assertThat(original.getModel()).isEqualTo("test-model");
+		assertThat(original.getMaxTokens()).isEqualTo(100);
 
-	@Test
-	void testDefaultValues() {
-		AnthropicChatOptions options = new AnthropicChatOptions();
-		assertThat(options.getModel()).isNull();
-		assertThat(options.getMaxTokens()).isNull();
-		assertThat(options.getTemperature()).isNull();
-		assertThat(options.getTopK()).isNull();
-		assertThat(options.getTopP()).isNull();
-		assertThat(options.getStopSequences()).isNull();
-		assertThat(options.getMetadata()).isNull();
-		assertThat(options.getOutputSchema()).isNull();
-		assertThat(options.getOutputFormat()).isNull();
+		// Modify original collections and verify copy is unchanged
+		mutableStops.add("stop3");
+		mutableContext.put("key2", "value2");
+		assertThat(copied.getStopSequences()).hasSize(2);
+		assertThat(copied.getToolContext()).hasSize(1);
 	}
 
 	@Test
-	void testBuilderWithEmptyCollections() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.stopSequences(Collections.emptyList())
-			.toolContext(Collections.emptyMap())
+	void testCombineWithOverridesOnlyNonNullValues() {
+		AnthropicChatOptions base = AnthropicChatOptions.builder()
+			.model("base-model")
+			.maxTokens(100)
+			.temperature(0.5)
+			.topP(0.8)
+			.baseUrl("https://base.api.com")
+			.timeout(Duration.ofSeconds(60))
 			.build();
 
-		assertThat(options.getStopSequences()).isEmpty();
-		assertThat(options.getToolContext()).isEmpty();
-	}
-
-	@Test
-	void testBuilderWithSingleElementCollections() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.stopSequences(List.of("single-stop"))
-			.toolContext(Map.of("single-key", "single-value"))
+		AnthropicChatOptions override = AnthropicChatOptions.builder()
+			.model("override-model")
+			.topK(40)
+			// maxTokens, temperature, topP, baseUrl, timeout are null
 			.build();
 
-		assertThat(options.getStopSequences()).hasSize(1).containsExactly("single-stop");
-		assertThat(options.getToolContext()).hasSize(1).containsEntry("single-key", "single-value");
-	}
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
 
-	@Test
-	void testCopyWithEmptyOptions() {
-		AnthropicChatOptions emptyOptions = new AnthropicChatOptions();
-		AnthropicChatOptions copiedOptions = emptyOptions.copy();
+		// Override values take precedence
+		assertThat(merged.getModel()).isEqualTo("override-model");
+		assertThat(merged.getTopK()).isEqualTo(40);
 
-		assertThat(copiedOptions).isNotSameAs(emptyOptions).isEqualTo(emptyOptions);
-		assertThat(copiedOptions.getModel()).isNull();
-		assertThat(copiedOptions.getMaxTokens()).isNull();
-		assertThat(copiedOptions.getTemperature()).isNull();
+		// Base values preserved when override is null
+		assertThat(merged.getMaxTokens()).isEqualTo(100);
+		assertThat(merged.getTemperature()).isEqualTo(0.5);
+		assertThat(merged.getTopP()).isEqualTo(0.8);
+		assertThat(merged.getBaseUrl()).isEqualTo("https://base.api.com");
+		assertThat(merged.getTimeout()).isEqualTo(Duration.ofSeconds(60));
 	}
 
 	@Test
-	void testCopyMutationDoesNotAffectOriginal() {
-		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.model("original-model")
-			.maxTokens(100)
-			.temperature(0.5)
-			.stopSequences(List.of("original-stop"))
-			.toolContext(Map.of("original", "value"))
+	void testCombineWithCollections() {
+		AnthropicChatOptions base = AnthropicChatOptions.builder()
+			.stopSequences(List.of("base-stop"))
+			.toolNames(Set.of("base-tool"))
+			.toolContext(Map.of("base-key", "base-value"))
 			.build();
 
-		AnthropicChatOptions copy = original.copy();
-		copy.setModel("modified-model");
-		copy.setMaxTokens(200);
-		copy.setTemperature(0.8);
+		AnthropicChatOptions override = AnthropicChatOptions.builder()
+			.stopSequences(List.of("override-stop1", "override-stop2"))
+			.toolNames(Set.of("override-tool"))
+			// toolContext is empty, should not override
+			.build();
 
-		// Original should remain unchanged
-		assertThat(original.getModel()).isEqualTo("original-model");
-		assertThat(original.getMaxTokens()).isEqualTo(100);
-		assertThat(original.getTemperature()).isEqualTo(0.5);
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
 
-		// Copy should have new values
-		assertThat(copy.getModel()).isEqualTo("modified-model");
-		assertThat(copy.getMaxTokens()).isEqualTo(200);
-		assertThat(copy.getTemperature()).isEqualTo(0.8);
+		// Non-empty collections from override take precedence
+		assertThat(merged.getStopSequences()).containsExactly("override-stop1", "override-stop2");
+		assertThat(merged.getToolNames()).containsExactly("override-tool");
+		// Empty collections don't override
+		assertThat(merged.getToolContext()).containsEntry("base-key", "base-value");
 	}
 
 	@Test
@@ -195,459 +218,328 @@ void testEqualsAndHashCode() {
 			.temperature(0.7)
 			.build();
 
+		// Equal objects
 		assertThat(options1).isEqualTo(options2);
 		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
 
+		// Different objects
 		assertThat(options1).isNotEqualTo(options3);
-		assertThat(options1.hashCode()).isNotEqualTo(options3.hashCode());
+
+		// Null and different type
+		assertThat(options1).isNotEqualTo(null);
+		assertThat(options1).isNotEqualTo("not an options object");
 	}
 
 	@Test
-	void testChainedBuilderMethods() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model("test-model")
-			.maxTokens(150)
-			.temperature(0.6)
-			.topP(0.9)
-			.topK(40)
-			.stopSequences(List.of("stop"))
-			.metadata(new Metadata("user_456"))
-			.toolContext(Map.of("context", "value"))
-			.build();
+	void testToolCallbacksValidationRejectsNull() {
+		AnthropicChatOptions options = new AnthropicChatOptions();
 
-		// Verify all chained methods worked
-		assertThat(options.getModel()).isEqualTo("test-model");
-		assertThat(options.getMaxTokens()).isEqualTo(150);
-		assertThat(options.getTemperature()).isEqualTo(0.6);
-		assertThat(options.getTopP()).isEqualTo(0.9);
-		assertThat(options.getTopK()).isEqualTo(40);
-		assertThat(options.getStopSequences()).containsExactly("stop");
-		assertThat(options.getMetadata()).isEqualTo(new Metadata("user_456"));
-		assertThat(options.getToolContext()).containsEntry("context", "value");
+		assertThatThrownBy(() -> options.setToolCallbacks(null)).isInstanceOf(IllegalArgumentException.class)
+			.hasMessageContaining("toolCallbacks cannot be null");
 	}
 
 	@Test
-	void testSettersWithNullValues() {
+	void testToolNamesValidationRejectsNull() {
 		AnthropicChatOptions options = new AnthropicChatOptions();
 
-		options.setModel(null);
-		options.setMaxTokens(null);
-		options.setTemperature(null);
-		options.setTopK(null);
-		options.setTopP(null);
-		options.setStopSequences(null);
-		options.setMetadata(null);
-		options.setToolContext(null);
-
-		assertThat(options.getModel()).isNull();
-		assertThat(options.getMaxTokens()).isNull();
-		assertThat(options.getTemperature()).isNull();
-		assertThat(options.getTopK()).isNull();
-		assertThat(options.getTopP()).isNull();
-		assertThat(options.getStopSequences()).isNull();
-		assertThat(options.getMetadata()).isNull();
-		assertThat(options.getToolContext()).isNull();
-	}
-
-	@Test
-	void testBuilderAndSetterConsistency() {
-		// Build an object using builder
-		AnthropicChatOptions builderOptions = AnthropicChatOptions.builder()
-			.model("test-model")
-			.maxTokens(100)
-			.temperature(0.7)
-			.topP(0.8)
-			.topK(50)
-			.build();
-
-		// Create equivalent object using setters
-		AnthropicChatOptions setterOptions = new AnthropicChatOptions();
-		setterOptions.setModel("test-model");
-		setterOptions.setMaxTokens(100);
-		setterOptions.setTemperature(0.7);
-		setterOptions.setTopP(0.8);
-		setterOptions.setTopK(50);
-
-		assertThat(builderOptions).isEqualTo(setterOptions);
+		assertThatThrownBy(() -> options.setToolNames(null)).isInstanceOf(IllegalArgumentException.class)
+			.hasMessageContaining("toolNames cannot be null");
 	}
 
 	@Test
-	void testMetadataEquality() {
-		Metadata metadata1 = new Metadata("user_123");
-		Metadata metadata2 = new Metadata("user_123");
-		Metadata metadata3 = new Metadata("user_456");
-
-		AnthropicChatOptions options1 = AnthropicChatOptions.builder().metadata(metadata1).build();
+	void testDefaultConstants() {
+		assertThat(AnthropicChatOptions.DEFAULT_MODEL).isEqualTo("claude-haiku-4-5");
+		assertThat(AnthropicChatOptions.DEFAULT_MAX_TOKENS).isEqualTo(4096);
+	}
 
-		AnthropicChatOptions options2 = AnthropicChatOptions.builder().metadata(metadata2).build();
+	@Test
+	void testUnsupportedPenaltyMethodsReturnNull() {
+		AnthropicChatOptions options = new AnthropicChatOptions();
 
-		AnthropicChatOptions options3 = AnthropicChatOptions.builder().metadata(metadata3).build();
+		// Anthropic API does not support these OpenAI-specific parameters
+		assertThat(options.getFrequencyPenalty()).isNull();
+		assertThat(options.getPresencePenalty()).isNull();
+	}
 
-		assertThat(options1).isEqualTo(options2);
-		assertThat(options1).isNotEqualTo(options3);
+	@Test
+	void testImplementsStructuredOutputChatOptions() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().build();
+		assertThat(options).isInstanceOf(StructuredOutputChatOptions.class);
 	}
 
 	@Test
-	void testZeroValues() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.maxTokens(0)
-			.temperature(0.0)
-			.topP(0.0)
-			.topK(0)
-			.build();
+	void testOutputSchemaRoundTrip() {
+		String schema = "{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"}},\"required\":[\"name\"]}";
 
-		assertThat(options.getMaxTokens()).isEqualTo(0);
-		assertThat(options.getTemperature()).isEqualTo(0.0);
-		assertThat(options.getTopP()).isEqualTo(0.0);
-		assertThat(options.getTopK()).isEqualTo(0);
+		AnthropicChatOptions options = AnthropicChatOptions.builder().outputSchema(schema).build();
+
+		assertThat(options.getOutputSchema()).isNotNull();
+		assertThat(options.getOutputConfig()).isNotNull();
+		assertThat(options.getOutputConfig().format()).isPresent();
+
+		// Verify round-trip: the schema should parse and serialize back
+		String roundTripped = options.getOutputSchema();
+		assertThat(roundTripped).contains("\"type\"");
+		assertThat(roundTripped).contains("\"properties\"");
+		assertThat(roundTripped).contains("\"name\"");
+		assertThat(roundTripped).contains("\"required\"");
 	}
 
 	@Test
-	void testCopyPreservesAllFields() {
-		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.model("comprehensive-model")
-			.maxTokens(500)
-			.stopSequences(List.of("stop1", "stop2", "stop3"))
-			.temperature(0.75)
-			.topP(0.85)
-			.topK(60)
-			.metadata(new Metadata("comprehensive_test"))
-			.toolContext(Map.of("key1", "value1", "key2", "value2"))
-			.build();
-
-		AnthropicChatOptions copied = original.copy();
+	void testEffortConfiguration() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().effort(OutputConfig.Effort.HIGH).build();
 
-		// Verify all fields are preserved
-		assertThat(copied.getModel()).isEqualTo(original.getModel());
-		assertThat(copied.getMaxTokens()).isEqualTo(original.getMaxTokens());
-		assertThat(copied.getStopSequences()).isEqualTo(original.getStopSequences());
-		assertThat(copied.getTemperature()).isEqualTo(original.getTemperature());
-		assertThat(copied.getTopP()).isEqualTo(original.getTopP());
-		assertThat(copied.getTopK()).isEqualTo(original.getTopK());
-		assertThat(copied.getMetadata()).isEqualTo(original.getMetadata());
-		assertThat(copied.getToolContext()).isEqualTo(original.getToolContext());
-
-		// Ensure deep copy for collections
-		assertThat(copied.getStopSequences()).isNotSameAs(original.getStopSequences());
-		assertThat(copied.getToolContext()).isNotSameAs(original.getToolContext());
+		assertThat(options.getOutputConfig()).isNotNull();
+		assertThat(options.getOutputConfig().effort()).isPresent();
+		assertThat(options.getOutputConfig().effort().get()).isEqualTo(OutputConfig.Effort.HIGH);
+		// No format set, so outputSchema should be null
+		assertThat(options.getOutputSchema()).isNull();
 	}
 
 	@Test
-	void testBoundaryValues() {
+	void testOutputConfigWithEffortAndSchema() {
+		String schema = "{\"type\":\"object\",\"properties\":{\"result\":{\"type\":\"string\"}}}";
+
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.maxTokens(Integer.MAX_VALUE)
-			.temperature(1.0)
-			.topP(1.0)
-			.topK(Integer.MAX_VALUE)
+			.effort(OutputConfig.Effort.HIGH)
+			.outputSchema(schema)
 			.build();
 
-		assertThat(options.getMaxTokens()).isEqualTo(Integer.MAX_VALUE);
-		assertThat(options.getTemperature()).isEqualTo(1.0);
-		assertThat(options.getTopP()).isEqualTo(1.0);
-		assertThat(options.getTopK()).isEqualTo(Integer.MAX_VALUE);
+		assertThat(options.getOutputConfig()).isNotNull();
+		assertThat(options.getOutputConfig().effort()).isPresent();
+		assertThat(options.getOutputConfig().effort().get()).isEqualTo(OutputConfig.Effort.HIGH);
+		assertThat(options.getOutputConfig().format()).isPresent();
+		assertThat(options.getOutputSchema()).contains("result");
 	}
 
 	@Test
-	void testToolContextWithVariousValueTypes() {
-		Map<String, Object> mixedMap = Map.of("string", "value", "number", 42, "boolean", true, "null_value", "null",
-				"nested_list", List.of("a", "b", "c"), "nested_map", Map.of("inner", "value"));
+	void testOutputConfigDirectBuilder() {
+		OutputConfig outputConfig = OutputConfig.builder()
+			.effort(OutputConfig.Effort.MEDIUM)
+			.format(JsonOutputFormat.builder()
+				.schema(JsonOutputFormat.Schema.builder()
+					.putAdditionalProperty("type", JsonValue.from("object"))
+					.build())
+				.build())
+			.build();
 
-		AnthropicChatOptions options = AnthropicChatOptions.builder().toolContext(mixedMap).build();
+		AnthropicChatOptions options = AnthropicChatOptions.builder().outputConfig(outputConfig).build();
 
-		assertThat(options.getToolContext()).containsAllEntriesOf(mixedMap);
-		assertThat(options.getToolContext().get("string")).isEqualTo("value");
-		assertThat(options.getToolContext().get("number")).isEqualTo(42);
-		assertThat(options.getToolContext().get("boolean")).isEqualTo(true);
+		assertThat(options.getOutputConfig()).isNotNull();
+		assertThat(options.getOutputConfig().effort()).isPresent();
+		assertThat(options.getOutputConfig().format()).isPresent();
+		assertThat(options.getOutputSchema()).contains("object");
 	}
 
 	@Test
-	void testCopyWithMutableCollections() {
-		List<String> mutableStops = new java.util.ArrayList<>(List.of("stop1", "stop2"));
-		Map<String, Object> mutableContext = new java.util.HashMap<>(Map.of("key", "value"));
+	void testCombineWithPreservesOutputConfig() {
+		OutputConfig outputConfig = OutputConfig.builder().effort(OutputConfig.Effort.MEDIUM).build();
 
-		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.stopSequences(mutableStops)
-			.toolContext(mutableContext)
-			.build();
+		AnthropicChatOptions base = AnthropicChatOptions.builder().model("base-model").build();
 
-		AnthropicChatOptions copied = original.copy();
+		AnthropicChatOptions override = AnthropicChatOptions.builder().outputConfig(outputConfig).build();
 
-		// Modify original collections
-		mutableStops.add("stop3");
-		mutableContext.put("new_key", "new_value");
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
 
-		// Copied instance should not be affected
-		assertThat(copied.getStopSequences()).hasSize(2);
-		assertThat(copied.getToolContext()).hasSize(1);
-		assertThat(copied.getStopSequences()).doesNotContain("stop3");
-		assertThat(copied.getToolContext()).doesNotContainKey("new_key");
+		assertThat(merged.getModel()).isEqualTo("base-model");
+		assertThat(merged.getOutputConfig()).isNotNull();
+		assertThat(merged.getOutputConfig().effort()).isPresent();
+		assertThat(merged.getOutputConfig().effort().get()).isEqualTo(OutputConfig.Effort.MEDIUM);
 	}
 
 	@Test
-	void testEqualsWithNullFields() {
-		AnthropicChatOptions options1 = new AnthropicChatOptions();
-		AnthropicChatOptions options2 = new AnthropicChatOptions();
+	void testOutputConfigNullSchemaResetsConfig() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().outputSchema("{\"type\":\"object\"}").build();
+		assertThat(options.getOutputConfig()).isNotNull();
 
-		assertThat(options1).isEqualTo(options2);
-		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
+		options.setOutputSchema(null);
+		assertThat(options.getOutputConfig()).isNull();
+		assertThat(options.getOutputSchema()).isNull();
 	}
 
 	@Test
-	void testEqualsWithMixedNullAndNonNullFields() {
-		AnthropicChatOptions options1 = AnthropicChatOptions.builder()
-			.model("test")
-			.maxTokens(null)
-			.temperature(0.5)
-			.build();
+	void testHttpHeadersBuilder() {
+		Map<String, String> headers = Map.of("X-Custom-Header", "value1", "X-Request-Id", "req-123");
 
-		AnthropicChatOptions options2 = AnthropicChatOptions.builder()
-			.model("test")
-			.maxTokens(null)
-			.temperature(0.5)
-			.build();
+		AnthropicChatOptions options = AnthropicChatOptions.builder().httpHeaders(headers).build();
 
-		AnthropicChatOptions options3 = AnthropicChatOptions.builder()
-			.model("test")
-			.maxTokens(100)
-			.temperature(0.5)
-			.build();
-
-		assertThat(options1).isEqualTo(options2);
-		assertThat(options1).isNotEqualTo(options3);
+		assertThat(options.getHttpHeaders()).containsEntry("X-Custom-Header", "value1");
+		assertThat(options.getHttpHeaders()).containsEntry("X-Request-Id", "req-123");
 	}
 
 	@Test
-	void testCopyDoesNotShareMetadataReference() {
-		Metadata originalMetadata = new Metadata("user_123");
-		AnthropicChatOptions original = AnthropicChatOptions.builder().metadata(originalMetadata).build();
+	void testHttpHeadersDefaultEmpty() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().build();
+		assertThat(options.getHttpHeaders()).isNotNull().isEmpty();
+	}
 
-		AnthropicChatOptions copied = original.copy();
+	@Test
+	void testHttpHeadersCopiedInMutate() {
+		Map<String, String> headers = new HashMap<>(Map.of("X-Custom", "value"));
 
-		// Metadata should be the same value but potentially different reference
-		assertThat(copied.getMetadata()).isEqualTo(original.getMetadata());
+		AnthropicChatOptions original = AnthropicChatOptions.builder().httpHeaders(headers).build();
 
-		// Verify changing original doesn't affect copy
-		original.setMetadata(new Metadata("different_user"));
-		assertThat(copied.getMetadata()).isEqualTo(originalMetadata);
-	}
+		AnthropicChatOptions copied = original.mutate().build();
 
-	@Test
-	@SuppressWarnings("SelfAssertion")
-	void testEqualsWithSelf() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("test").build();
+		assertThat(copied.getHttpHeaders()).containsEntry("X-Custom", "value");
 
-		assertThat(options).isEqualTo(options);
-		assertThat(options.hashCode()).isEqualTo(options.hashCode());
+		// Verify deep copy — modifying original doesn't affect copy
+		original.getHttpHeaders().put("X-New", "new-value");
+		assertThat(copied.getHttpHeaders()).doesNotContainKey("X-New");
 	}
 
 	@Test
-	void testEqualsWithNull() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("test").build();
+	void testCombineWithPreservesHttpHeaders() {
+		AnthropicChatOptions base = AnthropicChatOptions.builder().httpHeaders(Map.of("X-Base", "base-value")).build();
 
-		assertThat(options).isNotEqualTo(null);
-	}
+		AnthropicChatOptions override = AnthropicChatOptions.builder()
+			.httpHeaders(Map.of("X-Override", "override-value"))
+			.build();
 
-	@Test
-	void testEqualsWithDifferentClass() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("test").build();
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
 
-		assertThat(options).isNotEqualTo("not an AnthropicChatOptions");
-		assertThat(options).isNotEqualTo(1);
+		// Override's non-empty headers replace base
+		assertThat(merged.getHttpHeaders()).containsEntry("X-Override", "override-value");
+		assertThat(merged.getHttpHeaders()).doesNotContainKey("X-Base");
 	}
 
 	@Test
-	void testBuilderPartialConfiguration() {
-		// Test builder with only some fields set
-		AnthropicChatOptions onlyModel = AnthropicChatOptions.builder().model("model-only").build();
-
-		AnthropicChatOptions onlyTokens = AnthropicChatOptions.builder().maxTokens(10).build();
-
-		AnthropicChatOptions onlyTemperature = AnthropicChatOptions.builder().temperature(0.8).build();
+	void testCombineWithEmptyHttpHeadersDoNotOverride() {
+		AnthropicChatOptions base = AnthropicChatOptions.builder().httpHeaders(Map.of("X-Base", "base-value")).build();
 
-		assertThat(onlyModel.getModel()).isEqualTo("model-only");
-		assertThat(onlyModel.getMaxTokens()).isNull();
+		AnthropicChatOptions override = AnthropicChatOptions.builder().build();
 
-		assertThat(onlyTokens.getModel()).isNull();
-		assertThat(onlyTokens.getMaxTokens()).isEqualTo(10);
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
 
-		assertThat(onlyTemperature.getModel()).isNull();
-		assertThat(onlyTemperature.getTemperature()).isEqualTo(0.8);
+		// Base headers preserved when override is empty
+		assertThat(merged.getHttpHeaders()).containsEntry("X-Base", "base-value");
 	}
 
 	@Test
-	void testSetterOverwriteBehavior() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("initial-model").maxTokens(100).build();
+	void testHttpHeadersInEqualsAndHashCode() {
+		AnthropicChatOptions options1 = AnthropicChatOptions.builder().httpHeaders(Map.of("X-Header", "value")).build();
 
-		// Overwrite with setters
-		options.setModel("updated-model");
-		options.setMaxTokens(10);
+		AnthropicChatOptions options2 = AnthropicChatOptions.builder().httpHeaders(Map.of("X-Header", "value")).build();
+
+		AnthropicChatOptions options3 = AnthropicChatOptions.builder()
+			.httpHeaders(Map.of("X-Header", "different"))
+			.build();
 
-		assertThat(options.getModel()).isEqualTo("updated-model");
-		assertThat(options.getMaxTokens()).isEqualTo(10);
+		assertThat(options1).isEqualTo(options2);
+		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
+		assertThat(options1).isNotEqualTo(options3);
 	}
 
 	@Test
-	void testCacheStrategyBuilder() {
-		var cacheOptions = AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build();
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model("test-model")
-			.cacheOptions(cacheOptions)
+	void testCitationConsistencyValidationPasses() {
+		AnthropicCitationDocument doc1 = AnthropicCitationDocument.builder()
+			.plainText("Text 1")
+			.title("Doc 1")
+			.citationsEnabled(true)
+			.build();
+		AnthropicCitationDocument doc2 = AnthropicCitationDocument.builder()
+			.plainText("Text 2")
+			.title("Doc 2")
+			.citationsEnabled(true)
 			.build();
-		assertThat(options.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_AND_TOOLS);
-	}
 
-	@Test
-	void testCacheStrategyDefaultValue() {
-		AnthropicChatOptions options = new AnthropicChatOptions();
-		assertThat(options.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.NONE);
-		assertThat(options.getCacheOptions().getMessageTypeTtl().values())
-			.allMatch(ttl -> ttl == AnthropicCacheTtl.FIVE_MINUTES);
+		// Should not throw — all documents have consistent citation settings
+		AnthropicChatOptions options = AnthropicChatOptions.builder().citationDocuments(doc1, doc2).build();
+
+		assertThat(options.getCitationDocuments()).hasSize(2);
 	}
 
 	@Test
-	void testCacheStrategyEqualsAndHashCode() {
-		var sharedCacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
-			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-			.build();
-		AnthropicChatOptions options1 = AnthropicChatOptions.builder()
-			.model("test-model")
-			.cacheOptions(sharedCacheOptions)
+	void testCitationConsistencyValidationFailsOnMixed() {
+		AnthropicCitationDocument enabled = AnthropicCitationDocument.builder()
+			.plainText("Text 1")
+			.title("Doc 1")
+			.citationsEnabled(true)
 			.build();
-		AnthropicChatOptions options2 = AnthropicChatOptions.builder()
-			.model("test-model")
-			.cacheOptions(sharedCacheOptions)
-			.build();
-		var differentCacheOptions = AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build();
-		AnthropicChatOptions options3 = AnthropicChatOptions.builder()
-			.model("test-model")
-			.cacheOptions(differentCacheOptions)
+		AnthropicCitationDocument disabled = AnthropicCitationDocument.builder()
+			.plainText("Text 2")
+			.title("Doc 2")
+			.citationsEnabled(false)
 			.build();
 
-		assertThat(options1).isEqualTo(options2);
-		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
-		assertThat(options1).isNotEqualTo(options3);
-		assertThat(options1.hashCode()).isNotEqualTo(options3.hashCode());
+		assertThatThrownBy(() -> AnthropicChatOptions.builder().citationDocuments(enabled, disabled).build())
+			.isInstanceOf(IllegalArgumentException.class)
+			.hasMessageContaining("consistent citation settings");
 	}
 
 	@Test
-	void testCacheStrategyCopy() {
-		var cacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-			.build();
-		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.model("test-model")
-			.cacheOptions(cacheOptions)
-			.build();
+	void testCitationConsistencyValidationSkipsEmpty() {
+		// Should not throw — no documents
+		AnthropicChatOptions options = AnthropicChatOptions.builder().build();
+		assertThat(options.getCitationDocuments()).isEmpty();
+	}
 
-		AnthropicChatOptions copied = original.copy();
+	@Test
+	void testSkillBuilderWithStringId() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().skill("xlsx").build();
 
-		assertThat(copied).isNotSameAs(original).isEqualTo(original);
-		assertThat(copied.getCacheOptions().getStrategy()).isEqualTo(original.getCacheOptions().getStrategy());
-		assertThat(copied.getCacheOptions().getMessageTypeTtl())
-			.isEqualTo(original.getCacheOptions().getMessageTypeTtl());
+		assertThat(options.getSkillContainer()).isNotNull();
+		assertThat(options.getSkillContainer().getSkills()).hasSize(1);
+		assertThat(options.getSkillContainer().getSkills().get(0).getSkillId()).isEqualTo("xlsx");
+		assertThat(options.getSkillContainer().getSkills().get(0).getType()).isEqualTo(AnthropicSkillType.ANTHROPIC);
+		assertThat(options.getSkillContainer().getSkills().get(0).getVersion()).isEqualTo("latest");
 	}
 
 	@Test
-	void testCacheStrategyWithDefaultValues() {
-		AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").build();
-		assertThat(options.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.NONE);
-		assertThat(options.getCacheOptions().getMessageTypeTtl().values())
-			.allMatch(ttl -> ttl == AnthropicCacheTtl.FIVE_MINUTES);
+	void testSkillBuilderWithEnum() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().skill(AnthropicSkill.PPTX).build();
+
+		assertThat(options.getSkillContainer()).isNotNull();
+		assertThat(options.getSkillContainer().getSkills().get(0).getSkillId()).isEqualTo("pptx");
+		assertThat(options.getSkillContainer().getSkills().get(0).getType()).isEqualTo(AnthropicSkillType.ANTHROPIC);
 	}
 
 	@Test
-	void testBuilderWithAllFieldsIncludingCacheStrategy() {
-		var cacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-			.build();
+	void testMultipleSkills() {
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model("test-model")
-			.maxTokens(100)
-			.stopSequences(List.of("stop1", "stop2"))
-			.temperature(0.7)
-			.topP(0.8)
-			.topK(50)
-			.metadata(new Metadata("userId_123"))
-			.cacheOptions(cacheOptions)
+			.skill(AnthropicSkill.XLSX)
+			.skill(AnthropicSkill.PPTX)
 			.build();
 
-		assertThat(options).extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata")
-			.containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"));
-		assertThat(options.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_ONLY);
-		assertThat(options.getCacheOptions().getMessageTypeTtl().get(MessageType.SYSTEM))
-			.isEqualTo(AnthropicCacheTtl.ONE_HOUR);
+		assertThat(options.getSkillContainer()).isNotNull();
+		assertThat(options.getSkillContainer().getSkills()).hasSize(2);
+		assertThat(options.getSkillContainer().getSkills().get(0).getSkillId()).isEqualTo("xlsx");
+		assertThat(options.getSkillContainer().getSkills().get(1).getSkillId()).isEqualTo("pptx");
 	}
 
 	@Test
-	void testCacheStrategyMutationDoesNotAffectOriginal() {
-		var originalCacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
-			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-			.build();
+	void testSkillContainerCopiedInMutate() {
 		AnthropicChatOptions original = AnthropicChatOptions.builder()
-			.model("original-model")
-			.cacheOptions(originalCacheOptions)
+			.skill(AnthropicSkill.XLSX)
+			.skill(AnthropicSkill.PDF)
 			.build();
 
-		AnthropicChatOptions copy = original.copy();
-		var modifiedCacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.NONE)
-			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.FIVE_MINUTES)
-			.build();
-		copy.setCacheOptions(modifiedCacheOptions);
-
-		// Original should remain unchanged
-		assertThat(original.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_AND_TOOLS);
-		assertThat(original.getCacheOptions().getMessageTypeTtl().get(MessageType.SYSTEM))
-			.isEqualTo(AnthropicCacheTtl.ONE_HOUR);
-
-		// Copy should have modified values
-		assertThat(copy.getCacheOptions().getStrategy()).isEqualTo(AnthropicCacheStrategy.NONE);
-		assertThat(copy.getCacheOptions().getMessageTypeTtl().get(MessageType.SYSTEM))
-			.isEqualTo(AnthropicCacheTtl.FIVE_MINUTES);
-	}
-
-	@Test
-	void testStructuredOutputSchema() {
-		String outputSchema = """
-				{
-					"$schema": "https://json-schema.org/draft/2020-12/schema",
-					"type": "object",
-					"properties": {
-						"name": {
-							"type": "string"
-						},
-						"required": [
-							"name"
-						]
-					}
-				}
-				""";
-		var options = AnthropicChatOptions.builder().outputSchema(outputSchema).build();
-
-		assertThat(options.getOutputFormat()).isNotNull();
-		assertThat(options.getOutputFormat().type()).isEqualTo("json_schema");
-		assertThat(options.getOutputFormat().type()).isEqualTo("json_schema");
-		assertThat(options.getOutputFormat().schema()).containsOnly(
-				Assertions.entry("$schema", "https://json-schema.org/draft/2020-12/schema"),
-				Assertions.entry("type", "object"),
-				Assertions.entry("properties", Map.of("name", Map.of("type", "string"), "required", List.of("name"))));
-
-		JsonAssertions.assertThatJson(options.getOutputSchema()).isEqualTo(outputSchema);
+		AnthropicChatOptions copied = original.mutate().build();
+
+		assertThat(copied.getSkillContainer()).isNotNull();
+		assertThat(copied.getSkillContainer().getSkills()).hasSize(2);
+		assertThat(copied.getSkillContainer().getSkills().get(0).getSkillId()).isEqualTo("xlsx");
+		assertThat(copied.getSkillContainer().getSkills().get(1).getSkillId()).isEqualTo("pdf");
 	}
 
-	@Override
-	protected Class<AnthropicChatOptions> getConcreteOptionsClass() {
-		return AnthropicChatOptions.class;
+	@Test
+	void testCombineWithPreservesSkillContainer() {
+		AnthropicChatOptions base = AnthropicChatOptions.builder().model("base-model").build();
+
+		AnthropicChatOptions override = AnthropicChatOptions.builder().skill(AnthropicSkill.DOCX).build();
+
+		AnthropicChatOptions merged = base.mutate().combineWith(override.mutate()).build();
+
+		assertThat(merged.getModel()).isEqualTo("base-model");
+		assertThat(merged.getSkillContainer()).isNotNull();
+		assertThat(merged.getSkillContainer().getSkills()).hasSize(1);
+		assertThat(merged.getSkillContainer().getSkills().get(0).getSkillId()).isEqualTo("docx");
 	}
 
-	@Override
-	@SuppressWarnings("unchecked")
-	protected B readyToBuildBuilder() {
-		return (B) AnthropicChatOptions.builder().model(ChatModel.CLAUDE_HAIKU_4_5).maxTokens(500);
+	@Test
+	void testSkillContainerDefaultIsNull() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().build();
+		assertThat(options.getSkillContainer()).isNull();
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCitationIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCitationIT.java
deleted file mode 100644
index fb3df5b13a2..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicCitationIT.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.CitationDocument;
-import org.springframework.ai.chat.messages.UserMessage;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.SpringBootConfiguration;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.context.annotation.Bean;
-import org.springframework.util.StringUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Integration tests for Anthropic Citations API support.
- *
- * @author Soby Chacko
- * @since 1.1.0
- */
-@SpringBootTest(classes = AnthropicCitationIT.Config.class)
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-class AnthropicCitationIT {
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicCitationIT.class);
-
-	@Autowired
-	private AnthropicChatModel chatModel;
-
-	@Test
-	void testPlainTextCitation() {
-		// Create a citation document with plain text
-		CitationDocument document = CitationDocument.builder()
-			.plainText(
-					"The Eiffel Tower is located in Paris, France. It was completed in 1889 and stands 330 meters tall.")
-			.title("Eiffel Tower Facts")
-			.citationsEnabled(true)
-			.build();
-
-		// Create a prompt asking a question about the document
-		// Use explicit instruction to answer from the provided document
-		UserMessage userMessage = new UserMessage(
-				"Based solely on the provided document, where is the Eiffel Tower located and when was it completed?");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.maxTokens(2048)
-			.temperature(0.0) // Use temperature 0 for more deterministic responses
-			.citationDocuments(document)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-
-		// Call the model
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response exists and is not empty
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		String responseText = response.getResult().getOutput().getText();
-		assertThat(responseText).as("Response text should not be blank").isNotBlank();
-
-		// Verify citations are present in metadata (this is the core feature being
-		// tested)
-		Object citationsObj = response.getMetadata().get("citations");
-		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
-
-		@SuppressWarnings("unchecked")
-		List<Citation> citations = (List<Citation>) citationsObj;
-		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
-
-		// Verify citation structure - all citations should have proper fields
-		for (Citation citation : citations) {
-			assertThat(citation.getType()).as("Citation type should be CHAR_LOCATION for plain text")
-				.isEqualTo(Citation.LocationType.CHAR_LOCATION);
-			assertThat(citation.getCitedText()).as("Cited text should not be blank").isNotBlank();
-			assertThat(citation.getDocumentIndex()).as("Document index should be 0 (first document)").isEqualTo(0);
-			assertThat(citation.getDocumentTitle()).as("Document title should match").isEqualTo("Eiffel Tower Facts");
-			assertThat(citation.getStartCharIndex()).as("Start char index should be non-negative")
-				.isGreaterThanOrEqualTo(0);
-			assertThat(citation.getEndCharIndex()).as("End char index should be greater than start")
-				.isGreaterThan(citation.getStartCharIndex());
-		}
-	}
-
-	@Test
-	void testMultipleCitationDocuments() {
-		// Create multiple citation documents
-		CitationDocument parisDoc = CitationDocument.builder()
-			.plainText("Paris is the capital city of France. It has a population of about 2.1 million people.")
-			.title("Paris Information")
-			.citationsEnabled(true)
-			.build();
-
-		CitationDocument eiffelDoc = CitationDocument.builder()
-			.plainText("The Eiffel Tower was designed by Gustave Eiffel and completed in 1889 for the World's Fair.")
-			.title("Eiffel Tower History")
-			.citationsEnabled(true)
-			.build();
-
-		// Use explicit instruction to answer from the provided documents
-		UserMessage userMessage = new UserMessage(
-				"Based solely on the provided documents, what is the capital of France and who designed the Eiffel Tower?");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.maxTokens(1024)
-			.temperature(0.0) // Use temperature 0 for more deterministic responses
-			.citationDocuments(parisDoc, eiffelDoc)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-
-		// Call the model
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response exists and is not empty
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		String responseText = response.getResult().getOutput().getText();
-		assertThat(responseText).as("Response text should not be blank").isNotBlank();
-
-		// Verify citations are present (this is the core feature being tested)
-		Object citationsObj = response.getMetadata().get("citations");
-		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
-
-		@SuppressWarnings("unchecked")
-		List<Citation> citations = (List<Citation>) citationsObj;
-		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
-
-		// Verify we have citations from both documents
-		// Check that citations reference both document indices (0 and 1)
-		boolean hasDoc0 = citations.stream().anyMatch(c -> c.getDocumentIndex() == 0);
-		boolean hasDoc1 = citations.stream().anyMatch(c -> c.getDocumentIndex() == 1);
-		assertThat(hasDoc0 && hasDoc1).as("Should have citations from at least one document").isTrue();
-
-		// Verify citation structure for all citations
-		for (Citation citation : citations) {
-			assertThat(citation.getType()).as("Citation type should be CHAR_LOCATION for plain text")
-				.isEqualTo(Citation.LocationType.CHAR_LOCATION);
-			assertThat(citation.getCitedText()).as("Cited text should not be blank").isNotBlank();
-			assertThat(citation.getDocumentIndex()).as("Document index should be 0 or 1").isIn(0, 1);
-			assertThat(citation.getDocumentTitle()).as("Document title should be one of the provided titles")
-				.isIn("Paris Information", "Eiffel Tower History");
-			assertThat(citation.getStartCharIndex()).as("Start char index should be non-negative")
-				.isGreaterThanOrEqualTo(0);
-			assertThat(citation.getEndCharIndex()).as("End char index should be greater than start")
-				.isGreaterThan(citation.getStartCharIndex());
-		}
-	}
-
-	@Test
-	void testCustomContentCitation() {
-		// Create a citation document with custom content blocks for fine-grained citation
-		// control
-		CitationDocument document = CitationDocument.builder()
-			.customContent("The Great Wall of China is approximately 21,196 kilometers long.",
-					"It was built over many centuries, starting in the 7th century BC.",
-					"The wall was constructed to protect Chinese states from invasions.")
-			.title("Great Wall Facts")
-			.citationsEnabled(true)
-			.build();
-
-		UserMessage userMessage = new UserMessage(
-				"Based solely on the provided document, how long is the Great Wall of China and when was it started?");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.maxTokens(1024)
-			.temperature(0.0)
-			.citationDocuments(document)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response and citations
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		assertThat(response.getResult().getOutput().getText()).isNotBlank();
-
-		Object citationsObj = response.getMetadata().get("citations");
-		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
-
-		@SuppressWarnings("unchecked")
-		List<Citation> citations = (List<Citation>) citationsObj;
-		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
-
-		// For custom content, citations should use CONTENT_BLOCK_LOCATION type
-		for (Citation citation : citations) {
-			assertThat(citation.getType()).as("Citation type should be CONTENT_BLOCK_LOCATION for custom content")
-				.isEqualTo(Citation.LocationType.CONTENT_BLOCK_LOCATION);
-			assertThat(citation.getCitedText()).as("Cited text should not be blank").isNotBlank();
-			assertThat(citation.getDocumentIndex()).as("Document index should be 0").isEqualTo(0);
-			assertThat(citation.getDocumentTitle()).as("Document title should match").isEqualTo("Great Wall Facts");
-			// For content block citations, we have start/end block indices instead of
-			// char indices
-			assertThat(citation.getStartBlockIndex()).as("Start block index should be non-negative")
-				.isGreaterThanOrEqualTo(0);
-			assertThat(citation.getEndBlockIndex()).as("End block index should be >= start")
-				.isGreaterThanOrEqualTo(citation.getStartBlockIndex());
-		}
-	}
-
-	@Test
-	void testPdfCitation() throws IOException {
-		// Load the test PDF from resources
-		CitationDocument document = CitationDocument.builder()
-			.pdfFile("src/test/resources/spring-ai-reference-overview.pdf")
-			.title("Spring AI Reference")
-			.citationsEnabled(true)
-			.build();
-
-		UserMessage userMessage = new UserMessage("Based solely on the provided document, what is Spring AI?");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.maxTokens(1024)
-			.temperature(0.0)
-			.citationDocuments(document)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response and citations
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		assertThat(response.getResult().getOutput().getText()).isNotBlank();
-
-		Object citationsObj = response.getMetadata().get("citations");
-		assertThat(citationsObj).as("Citations should be present for PDF documents").isNotNull();
-
-		@SuppressWarnings("unchecked")
-		List<Citation> citations = (List<Citation>) citationsObj;
-		assertThat(citations).as("Citation list should not be empty for PDF").isNotEmpty();
-
-		// For PDF documents, citations should use PAGE_LOCATION type
-		for (Citation citation : citations) {
-			assertThat(citation.getType()).as("Citation type should be PAGE_LOCATION for PDF")
-				.isEqualTo(Citation.LocationType.PAGE_LOCATION);
-			assertThat(citation.getCitedText()).as("Cited text should not be blank").isNotBlank();
-			assertThat(citation.getDocumentIndex()).as("Document index should be 0").isEqualTo(0);
-			assertThat(citation.getDocumentTitle()).as("Document title should match").isEqualTo("Spring AI Reference");
-			// For page citations, we have start/end page numbers instead of char indices
-			assertThat(citation.getStartPageNumber()).as("Start page number should be positive").isGreaterThan(0);
-			assertThat(citation.getEndPageNumber()).as("End page number should be >= start")
-				.isGreaterThanOrEqualTo(citation.getStartPageNumber());
-		}
-	}
-
-	@SpringBootConfiguration
-	public static class Config {
-
-		@Bean
-		public AnthropicApi anthropicApi() {
-			return AnthropicApi.builder().apiKey(getApiKey()).build();
-		}
-
-		private String getApiKey() {
-			String apiKey = System.getenv("ANTHROPIC_API_KEY");
-			if (!StringUtils.hasText(apiKey)) {
-				throw new IllegalArgumentException(
-						"You must provide an API key. Put it in an environment variable under the name ANTHROPIC_API_KEY");
-			}
-			return apiKey;
-		}
-
-		@Bean
-		public AnthropicChatModel anthropicChatModel(AnthropicApi api) {
-			return AnthropicChatModel.builder().anthropicApi(api).build();
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java
deleted file mode 100644
index 95e38ba0a85..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheStrategy;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
-import org.springframework.ai.anthropic.api.tool.MockWeatherService;
-import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
-import org.springframework.ai.chat.memory.ChatMemory;
-import org.springframework.ai.chat.memory.InMemoryChatMemoryRepository;
-import org.springframework.ai.chat.memory.MessageWindowChatMemory;
-import org.springframework.ai.chat.messages.Message;
-import org.springframework.ai.chat.messages.MessageType;
-import org.springframework.ai.chat.messages.SystemMessage;
-import org.springframework.ai.chat.messages.UserMessage;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.tool.function.FunctionToolCallback;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.core.io.Resource;
-import org.springframework.core.io.ResourceLoader;
-import org.springframework.util.StreamUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Integration tests for Anthropic prompt caching functionality.
- *
- * Tests various caching strategies to ensure proper cache breakpoint placement and
- * optimal cache utilization according to Anthropic's best practices.
- *
- * @author Austin Dase
- */
-@SpringBootTest(classes = AnthropicTestConfiguration.class)
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-public class AnthropicPromptCachingIT {
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicPromptCachingIT.class);
-
-	@Autowired
-	private AnthropicChatModel chatModel;
-
-	@Autowired
-	private ResourceLoader resourceLoader;
-
-	private String loadPrompt(String filename) {
-		try {
-			Resource resource = this.resourceLoader.getResource("classpath:prompts/" + filename);
-			String basePrompt = StreamUtils.copyToString(resource.getInputStream(), StandardCharsets.UTF_8);
-			// Add unique timestamp to prevent cache collisions across test runs
-			return basePrompt + "\n\nTest execution timestamp: " + System.currentTimeMillis();
-		}
-		catch (IOException e) {
-			throw new RuntimeException("Failed to load prompt: " + filename, e);
-		}
-	}
-
-	/**
-	 * Helper method to safely get AnthropicApi.Usage, returning null if not available.
-	 * This handles the case where getNativeUsage() returns null for tool-based
-	 * interactions.
-	 */
-	private AnthropicApi.Usage getAnthropicUsage(ChatResponse response) {
-		if (response == null || response.getMetadata() == null || response.getMetadata().getUsage() == null) {
-			return null;
-		}
-		Object nativeUsage = response.getMetadata().getUsage().getNativeUsage();
-		return (nativeUsage instanceof AnthropicApi.Usage usage) ? usage : null;
-	}
-
-	@Test
-	void shouldCacheSystemMessageOnly() {
-		String systemPrompt = loadPrompt("system-only-cache-prompt.txt");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
-			.maxTokens(150)
-			.temperature(0.3)
-			.build();
-
-		ChatResponse response = this.chatModel.call(new Prompt(
-				List.of(new SystemMessage(systemPrompt), new UserMessage("What is microservices architecture?")),
-				options));
-
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-		logger.info("System-only cache response: {}", response.getResult().getOutput().getText());
-
-		// For system-only caching, we should have native usage available
-		AnthropicApi.Usage usage = getAnthropicUsage(response);
-		assertThat(usage).isNotNull();
-
-		// Check cache behavior - either cache creation OR cache read should occur
-		boolean cacheCreated = usage.cacheCreationInputTokens() > 0;
-		boolean cacheRead = usage.cacheReadInputTokens() > 0;
-		assertThat(cacheCreated || cacheRead)
-			.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
-					usage.cacheCreationInputTokens(), usage.cacheReadInputTokens())
-			.isTrue();
-		assertThat(cacheCreated && cacheRead)
-			.withFailMessage("Cache creation and read should not happen simultaneously")
-			.isFalse();
-
-		logger.info("Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(),
-				usage.cacheReadInputTokens());
-	}
-
-	@Test
-	void shouldCacheSystemAndTools() {
-		String systemPrompt = loadPrompt("system-and-tools-cache-prompt.txt");
-
-		// Mock weather service
-		MockWeatherService weatherService = new MockWeatherService();
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build())
-			.maxTokens(200)
-			.temperature(0.3)
-			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", weatherService)
-				.description("Get current weather for a location")
-				.inputType(MockWeatherService.Request.class)
-				.build())
-			.build();
-
-		ChatResponse response = this.chatModel.call(
-				new Prompt(
-						List.of(new SystemMessage(systemPrompt),
-								new UserMessage(
-										"What's the weather like in San Francisco and should I go for a walk?")),
-						options));
-
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-		logger.info("System and tools cache response: {}", response.getResult().getOutput().getText());
-
-		// Anthropic's API doesn't provide cache usage metadata for tool-based
-		// interactions
-		// Validate what we can: configuration works and tools are called successfully
-		AnthropicApi.Usage usage = getAnthropicUsage(response);
-		if (usage != null) {
-			// If we get usage metadata, validate cache behavior
-			boolean cacheCreated = usage.cacheCreationInputTokens() > 0;
-			boolean cacheRead = usage.cacheReadInputTokens() > 0;
-			assertThat(cacheCreated || cacheRead)
-				.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
-						usage.cacheCreationInputTokens(), usage.cacheReadInputTokens())
-				.isTrue();
-			assertThat(cacheCreated && cacheRead)
-				.withFailMessage("Cache creation and read should not happen simultaneously")
-				.isFalse();
-
-			logger.info("Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(),
-					usage.cacheReadInputTokens());
-		}
-		else {
-			logger.debug("Native usage metadata not available for tool-based interactions - this is expected");
-			// Validate functional correctness: tools were called and response generated
-			assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-			// Ensure the weather service was actually called (indirect validation)
-			// Note: Full cache validation would require mocking the Anthropic API
-		}
-	}
-
-	@Test
-	void shouldCacheConversationHistory() {
-		// Create a conversation ID for this test
-		String conversationId = "history-cache-test-" + System.currentTimeMillis();
-
-		// Set up ChatMemory and advisor
-		ChatMemory chatMemory = MessageWindowChatMemory.builder()
-			.chatMemoryRepository(new InMemoryChatMemoryRepository())
-			.build();
-
-		MessageChatMemoryAdvisor advisor = MessageChatMemoryAdvisor.builder(chatMemory)
-			.conversationId(conversationId)
-			.build();
-
-		ChatClient chatClient = ChatClient.builder(this.chatModel)
-			.defaultAdvisors(advisor)
-			.defaultSystem(loadPrompt("conversation-history-cache-prompt.txt"))
-			.build();
-
-		// Build up conversation history
-		chatClient.prompt()
-			.user("My name is Alice and I work as a data scientist at TechCorp.")
-			.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId))
-			.call()
-			.content();
-
-		chatClient.prompt()
-			.user("I specialize in machine learning and have 5 years of experience with Python and R.")
-			.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId))
-			.call()
-			.content();
-
-		chatClient.prompt()
-			.user("Recently I've been working on a recommendation system for our e-commerce platform.")
-			.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId))
-			.call()
-			.content();
-
-		// Now use caching for the next conversation turn
-		String response = chatClient.prompt()
-			.user("What career advice would you give me based on our conversation?")
-			.options(AnthropicChatOptions.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-				.cacheOptions(
-						AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build())
-				.maxTokens(200)
-				.temperature(0.3)
-				.build())
-			.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId))
-			.call()
-			.content();
-
-		assertThat(response).isNotEmpty();
-		assertThat(response.toLowerCase()).contains("alice");
-		logger.info("Conversation history cache response: {}", response);
-
-		// Verify the conversation was remembered
-		List<Message> memoryMessages = chatMemory.get(conversationId);
-		assertThat(memoryMessages).hasSizeGreaterThan(6); // At least 4 user + 4 assistant
-															// messages
-	}
-
-	@Test
-	void shouldRespectMinLengthForSystemCaching() {
-		String systemPrompt = loadPrompt("system-only-cache-prompt.txt");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-				// Set min length above actual system prompt length to prevent caching
-				.messageTypeMinContentLength(MessageType.SYSTEM, systemPrompt.length() + 1)
-				.build())
-			.maxTokens(60)
-			.temperature(0.2)
-			.build();
-
-		ChatResponse response = this.chatModel
-			.call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("Ping")), options));
-
-		assertThat(response).isNotNull();
-		AnthropicApi.Usage usage = getAnthropicUsage(response);
-		assertThat(usage).isNotNull();
-		assertThat(usage.cacheCreationInputTokens()).as("No cache should be created below min length").isEqualTo(0);
-		assertThat(usage.cacheReadInputTokens()).as("No cache read expected below min length").isEqualTo(0);
-	}
-
-	@Test
-	void shouldRespectMinLengthForUserHistoryCaching() {
-		// Two-user-message prompt; aggregate length check applies
-		String userMessage = loadPrompt("system-only-cache-prompt.txt");
-		String secondUserMessage = "Please answer this question succinctly";
-		List<Message> messages = List.of(new UserMessage(userMessage), new UserMessage(secondUserMessage));
-
-		// Calculate combined length of both messages for aggregate checking
-		int combinedLength = userMessage.length() + secondUserMessage.length();
-
-		// Set USER min length higher than combined length so caching should not apply
-		AnthropicChatOptions noCacheOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-				.messageTypeMinContentLength(MessageType.USER, combinedLength + 1)
-				.build())
-			.maxTokens(80)
-			.temperature(0.2)
-			.build();
-
-		ChatResponse noCacheResponse = this.chatModel.call(new Prompt(messages, noCacheOptions));
-		assertThat(noCacheResponse).isNotNull();
-		AnthropicApi.Usage noCacheUsage = getAnthropicUsage(noCacheResponse);
-		assertThat(noCacheUsage).isNotNull();
-		assertThat(noCacheUsage.cacheCreationInputTokens()).isEqualTo(0);
-		assertThat(noCacheUsage.cacheReadInputTokens()).isEqualTo(0);
-
-		// Now allow caching by lowering the USER min length below combined length
-		AnthropicChatOptions cacheOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-				.messageTypeMinContentLength(MessageType.USER, combinedLength - 1)
-				.build())
-			.maxTokens(80)
-			.temperature(0.2)
-			.build();
-
-		ChatResponse cacheResponse = this.chatModel.call(new Prompt(messages, cacheOptions));
-		assertThat(cacheResponse).isNotNull();
-		AnthropicApi.Usage cacheUsage = getAnthropicUsage(cacheResponse);
-		assertThat(cacheUsage).isNotNull();
-		assertThat(cacheUsage.cacheCreationInputTokens())
-			.as("Expect some cache creation tokens when aggregate content meets min length")
-			.isGreaterThan(0);
-	}
-
-	@Test
-	void shouldApplyCacheControlToLastUserMessageForConversationHistory() {
-		// Three-user-message prompt; the last user message will have cache_control.
-		String userMessage = loadPrompt("system-only-cache-prompt.txt");
-		List<Message> messages = List.of(new UserMessage(userMessage),
-				new UserMessage("Additional content to exceed min length"),
-				new UserMessage("Please answer this question succinctly"));
-
-		// The combined length of all three USER messages (including the last) exceeds
-		// the min length, so caching should apply
-		AnthropicChatOptions cacheOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-				.messageTypeMinContentLength(MessageType.USER, userMessage.length())
-				.build())
-			.maxTokens(80)
-			.temperature(0.2)
-			.build();
-
-		ChatResponse cacheResponse = this.chatModel.call(new Prompt(messages, cacheOptions));
-		assertThat(cacheResponse).isNotNull();
-		AnthropicApi.Usage cacheUsage = getAnthropicUsage(cacheResponse);
-		assertThat(cacheUsage).isNotNull();
-		assertThat(cacheUsage.cacheCreationInputTokens())
-			.as("Expect some cache creation tokens when USER history tail is cached")
-			.isGreaterThan(0);
-	}
-
-	@Test
-	void shouldHandleExtendedTtlCaching() {
-		String systemPrompt = loadPrompt("extended-ttl-cache-prompt.txt");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-				.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-				.build())
-			.maxTokens(100)
-			.temperature(0.3)
-			.build();
-
-		ChatResponse response = this.chatModel
-			.call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("What is 2+2?")), options));
-
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).contains("4");
-		logger.info("Extended TTL cache response: {}", response.getResult().getOutput().getText());
-
-		// Check cache behavior - either cache creation OR cache read should occur
-		logger.info("DEBUG: About to get usage metadata for extended TTL test");
-		AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage();
-		logger.info("DEBUG: Got usage metadata for extended TTL test: {}", usage);
-		assertThat(usage).isNotNull();
-
-		boolean cacheCreated = usage.cacheCreationInputTokens() > 0;
-		boolean cacheRead = usage.cacheReadInputTokens() > 0;
-		assertThat(cacheCreated || cacheRead)
-			.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
-					usage.cacheCreationInputTokens(), usage.cacheReadInputTokens())
-			.isTrue();
-		assertThat(cacheCreated && cacheRead)
-			.withFailMessage("Cache creation and read should not happen simultaneously")
-			.isFalse();
-
-		logger.info("Extended TTL - Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(),
-				usage.cacheReadInputTokens());
-	}
-
-	@Test
-	void shouldNotCacheWithNoneStrategy() {
-		String systemPrompt = "You are a helpful assistant.";
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build())
-			.maxTokens(50)
-			.temperature(0.3)
-			.build();
-
-		ChatResponse response = this.chatModel
-			.call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("Hello!")), options));
-
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-		logger.info("No cache response: {}", response.getResult().getOutput().getText());
-
-		// Verify NO cache tokens are created (NONE strategy)
-		AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage();
-		assertThat(usage.cacheCreationInputTokens()).isEqualTo(0);
-		assertThat(usage.cacheReadInputTokens()).isEqualTo(0);
-		logger.info("No cache strategy - Cache creation tokens: {}, Cache read tokens: {}",
-				usage.cacheCreationInputTokens(), usage.cacheReadInputTokens());
-	}
-
-	@Test
-	void shouldHandleMultipleCacheStrategiesInSession() {
-		// Test that we can switch between different caching strategies
-		List<ChatResponse> responses = new ArrayList<>();
-
-		// First: System only
-		responses.add(this.chatModel.call(new Prompt(
-				List.of(new SystemMessage("You are a math tutor."), new UserMessage("What is calculus?")),
-				AnthropicChatOptions.builder()
-					.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-					.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
-					.maxTokens(100)
-					.build())));
-
-		// Second: No caching
-		responses.add(this.chatModel.call(new Prompt(List.of(new UserMessage("What's 5+5?")),
-				AnthropicChatOptions.builder()
-					.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-					.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build())
-					.maxTokens(50)
-					.build())));
-
-		// Verify all responses
-		for (int i = 0; i < responses.size(); i++) {
-			ChatResponse response = responses.get(i);
-			assertThat(response).isNotNull();
-			assertThat(response.getResult().getOutput().getText()).isNotEmpty();
-			logger.info("Response {}: {}", i + 1, response.getResult().getOutput().getText());
-		}
-	}
-
-	@Test
-	void shouldDemonstrateIncrementalCachingAcrossMultipleTurns() {
-		// This test demonstrates how caching grows incrementally with each turn
-		// NOTE: Anthropic requires 1024+ tokens for caching to activate
-		// We use a large system message to ensure we cross this threshold
-
-		// Large system prompt to ensure we exceed 1024 token minimum for caching
-		String largeSystemPrompt = loadPrompt("system-only-cache-prompt.txt");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_0.getValue())
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-				// Disable min content length since we're using aggregate check
-				.messageTypeMinContentLength(MessageType.USER, 0)
-				.build())
-			.maxTokens(200)
-			.temperature(0.3)
-			.build();
-
-		List<Message> conversationHistory = new ArrayList<>();
-		// Add system message to provide enough tokens for caching
-		conversationHistory.add(new SystemMessage(largeSystemPrompt));
-
-		// Turn 1: Initial question
-		logger.info("\n=== TURN 1: Initial Question ===");
-		conversationHistory.add(new UserMessage("What is quantum computing? Please explain the basics."));
-
-		ChatResponse turn1 = this.chatModel.call(new Prompt(conversationHistory, options));
-		assertThat(turn1).isNotNull();
-		String assistant1Response = turn1.getResult().getOutput().getText();
-		conversationHistory.add(turn1.getResult().getOutput());
-
-		AnthropicApi.Usage usage1 = getAnthropicUsage(turn1);
-		assertThat(usage1).isNotNull();
-		logger.info("Turn 1 - User: '{}'", conversationHistory.get(0).getText().substring(0, 50) + "...");
-		logger.info("Turn 1 - Assistant: '{}'",
-				assistant1Response.substring(0, Math.min(100, assistant1Response.length())) + "...");
-		logger.info("Turn 1 - Input tokens: {}", usage1.inputTokens());
-		logger.info("Turn 1 - Cache creation tokens: {}", usage1.cacheCreationInputTokens());
-		logger.info("Turn 1 - Cache read tokens: {}", usage1.cacheReadInputTokens());
-
-		// Note: First turn may not create cache if total tokens < 1024 (Anthropic's
-		// minimum)
-		// We'll track whether caching starts in turn 1 or later
-		boolean cachingStarted = usage1.cacheCreationInputTokens() > 0;
-		logger.info("Turn 1 - Caching started: {}", cachingStarted);
-		assertThat(usage1.cacheReadInputTokens()).as("Turn 1 should not read cache (no previous cache)").isEqualTo(0);
-
-		// Turn 2: Follow-up question
-		logger.info("\n=== TURN 2: Follow-up Question ===");
-		conversationHistory.add(new UserMessage("How does quantum entanglement work in this context?"));
-
-		ChatResponse turn2 = this.chatModel.call(new Prompt(conversationHistory, options));
-		assertThat(turn2).isNotNull();
-		String assistant2Response = turn2.getResult().getOutput().getText();
-		conversationHistory.add(turn2.getResult().getOutput());
-
-		AnthropicApi.Usage usage2 = getAnthropicUsage(turn2);
-		assertThat(usage2).isNotNull();
-		logger.info("Turn 2 - User: '{}'", conversationHistory.get(2).getText());
-		logger.info("Turn 2 - Assistant: '{}'",
-				assistant2Response.substring(0, Math.min(100, assistant2Response.length())) + "...");
-		logger.info("Turn 2 - Input tokens: {}", usage2.inputTokens());
-		logger.info("Turn 2 - Cache creation tokens: {}", usage2.cacheCreationInputTokens());
-		logger.info("Turn 2 - Cache read tokens: {}", usage2.cacheReadInputTokens());
-
-		// Second turn: If caching started in turn 1, we should see cache reads
-		// Otherwise, caching might start here if we've accumulated enough tokens
-		if (cachingStarted) {
-			assertThat(usage2.cacheReadInputTokens()).as("Turn 2 should read cache from Turn 1").isGreaterThan(0);
-		}
-		// Update caching status
-		cachingStarted = cachingStarted || usage2.cacheCreationInputTokens() > 0;
-
-		// Turn 3: Another follow-up
-		logger.info("\n=== TURN 3: Deeper Question ===");
-		conversationHistory
-			.add(new UserMessage("Can you give me a practical example of quantum computing application?"));
-
-		ChatResponse turn3 = this.chatModel.call(new Prompt(conversationHistory, options));
-		assertThat(turn3).isNotNull();
-		String assistant3Response = turn3.getResult().getOutput().getText();
-		conversationHistory.add(turn3.getResult().getOutput());
-
-		AnthropicApi.Usage usage3 = getAnthropicUsage(turn3);
-		assertThat(usage3).isNotNull();
-		logger.info("Turn 3 - User: '{}'", conversationHistory.get(4).getText());
-		logger.info("Turn 3 - Assistant: '{}'",
-				assistant3Response.substring(0, Math.min(100, assistant3Response.length())) + "...");
-		logger.info("Turn 3 - Input tokens: {}", usage3.inputTokens());
-		logger.info("Turn 3 - Cache creation tokens: {}", usage3.cacheCreationInputTokens());
-		logger.info("Turn 3 - Cache read tokens: {}", usage3.cacheReadInputTokens());
-
-		// Third turn: Should read cache if caching has started
-		if (cachingStarted) {
-			assertThat(usage3.cacheReadInputTokens()).as("Turn 3 should read cache if caching has started")
-				.isGreaterThan(0);
-		}
-		// Update caching status
-		cachingStarted = cachingStarted || usage3.cacheCreationInputTokens() > 0;
-
-		// Turn 4: Final question
-		logger.info("\n=== TURN 4: Final Question ===");
-		conversationHistory.add(new UserMessage("What are the limitations of current quantum computers?"));
-
-		ChatResponse turn4 = this.chatModel.call(new Prompt(conversationHistory, options));
-		assertThat(turn4).isNotNull();
-		String assistant4Response = turn4.getResult().getOutput().getText();
-		conversationHistory.add(turn4.getResult().getOutput());
-
-		AnthropicApi.Usage usage4 = getAnthropicUsage(turn4);
-		assertThat(usage4).isNotNull();
-		logger.info("Turn 4 - User: '{}'", conversationHistory.get(6).getText());
-		logger.info("Turn 4 - Assistant: '{}'",
-				assistant4Response.substring(0, Math.min(100, assistant4Response.length())) + "...");
-		logger.info("Turn 4 - Input tokens: {}", usage4.inputTokens());
-		logger.info("Turn 4 - Cache creation tokens: {}", usage4.cacheCreationInputTokens());
-		logger.info("Turn 4 - Cache read tokens: {}", usage4.cacheReadInputTokens());
-
-		// Fourth turn: By now we should definitely have caching working
-		assertThat(cachingStarted).as("Caching should have started by turn 4").isTrue();
-		if (cachingStarted) {
-			assertThat(usage4.cacheReadInputTokens()).as("Turn 4 should read cache").isGreaterThan(0);
-		}
-
-		// Summary logging
-		logger.info("\n=== CACHING SUMMARY ===");
-		logger.info("Turn 1 - Created: {}, Read: {}", usage1.cacheCreationInputTokens(), usage1.cacheReadInputTokens());
-		logger.info("Turn 2 - Created: {}, Read: {}", usage2.cacheCreationInputTokens(), usage2.cacheReadInputTokens());
-		logger.info("Turn 3 - Created: {}, Read: {}", usage3.cacheCreationInputTokens(), usage3.cacheReadInputTokens());
-		logger.info("Turn 4 - Created: {}, Read: {}", usage4.cacheCreationInputTokens(), usage4.cacheReadInputTokens());
-
-		// Demonstrate incremental growth pattern
-		logger.info("\n=== CACHE GROWTH PATTERN ===");
-		logger.info("Cache read tokens grew from {} → {} → {} → {}", usage1.cacheReadInputTokens(),
-				usage2.cacheReadInputTokens(), usage3.cacheReadInputTokens(), usage4.cacheReadInputTokens());
-		logger.info("This demonstrates incremental prefix caching: each turn builds on the previous cache");
-
-		// Verify that once caching starts, cache reads continue to grow
-		List<Integer> cacheReads = List.of(usage1.cacheReadInputTokens(), usage2.cacheReadInputTokens(),
-				usage3.cacheReadInputTokens(), usage4.cacheReadInputTokens());
-		int firstNonZeroIndex = -1;
-		for (int i = 0; i < cacheReads.size(); i++) {
-			if (cacheReads.get(i) > 0) {
-				firstNonZeroIndex = i;
-				break;
-			}
-		}
-		if (firstNonZeroIndex >= 0 && firstNonZeroIndex < cacheReads.size() - 1) {
-			// Verify each subsequent turn has cache reads >= previous
-			for (int i = firstNonZeroIndex + 1; i < cacheReads.size(); i++) {
-				assertThat(cacheReads.get(i))
-					.as("Cache reads should grow or stay same once caching starts (turn %d vs turn %d)", i + 1, i)
-					.isGreaterThanOrEqualTo(cacheReads.get(i - 1));
-			}
-		}
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
deleted file mode 100644
index dc8bfbe1fd3..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
+++ /dev/null
@@ -1,804 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-import okhttp3.mockwebserver.MockResponse;
-import okhttp3.mockwebserver.MockWebServer;
-import okhttp3.mockwebserver.RecordedRequest;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import tools.jackson.databind.JsonNode;
-import tools.jackson.databind.json.JsonMapper;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheStrategy;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
-import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.messages.MessageType;
-import org.springframework.ai.chat.messages.SystemMessage;
-import org.springframework.ai.chat.messages.UserMessage;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.tool.annotation.Tool;
-import org.springframework.ai.tool.method.MethodToolCallback;
-import org.springframework.ai.tool.support.ToolDefinitions;
-import org.springframework.util.ReflectionUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Mock tests for Anthropic prompt caching functionality with tool calling validation.
- * Tests the wire format and cache control headers without requiring real API calls.
- *
- * @author Mark Pollack
- * @author Austin Dase
- * @since 1.1.0
- */
-class AnthropicPromptCachingMockTest {
-
-	private MockWebServer mockWebServer;
-
-	private AnthropicChatModel chatModel;
-
-	@BeforeEach
-	void setUp() throws IOException {
-		this.mockWebServer = new MockWebServer();
-		this.mockWebServer.start();
-
-		String baseUrl = this.mockWebServer.url("/").toString();
-		AnthropicApi anthropicApi = AnthropicApi.builder().apiKey("test-api-key").baseUrl(baseUrl).build();
-		this.chatModel = AnthropicChatModel.builder().anthropicApi(anthropicApi).build();
-	}
-
-	@AfterEach
-	void tearDown() throws IOException {
-		this.mockWebServer.shutdown();
-	}
-
-	@Test
-	void testSystemOnlyCacheStrategy() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Hello! I understand you want to test caching."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"stop_sequence": null,
-					"usage": {
-						"input_tokens": 50,
-						"output_tokens": 20
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
-		var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
-		MethodToolCallback toolCallback = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
-			.toolMethod(toolMethod)
-			.build();
-
-		// Test with SYSTEM_ONLY cache strategy
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
-			.toolCallbacks(List.of(toolCallback))
-			.build();
-
-		Prompt prompt = new Prompt(
-				List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Test message")), options);
-
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify system message has cache control
-		assertThat(requestBody.has("system")).isTrue();
-		JsonNode systemNode = requestBody.get("system");
-		if (systemNode.isArray()) {
-			JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1);
-			assertThat(lastSystemBlock.has("cache_control")).isTrue();
-			assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
-		}
-
-		// Verify tools exist but DO NOT have cache_control (key difference from
-		// SYSTEM_AND_TOOLS)
-		if (requestBody.has("tools")) {
-			JsonNode toolsArray = requestBody.get("tools");
-			assertThat(toolsArray.isArray()).isTrue();
-			// Verify NO tool has cache_control
-			for (int i = 0; i < toolsArray.size(); i++) {
-				JsonNode tool = toolsArray.get(i);
-				assertThat(tool.has("cache_control")).isFalse();
-			}
-		}
-
-		// Verify response
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).contains("Hello!");
-	}
-
-	@Test
-	void testSystemMinLengthDisablesCaching() throws Exception {
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [ { "type": "text", "text": "ok" } ],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": { "input_tokens": 10, "output_tokens": 2 }
-				}
-				""";
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-			.messageTypeMinContentLength(MessageType.SYSTEM, 1000)
-			.build();
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder().cacheOptions(cacheOptions).build();
-		Prompt prompt = new Prompt(List.of(new SystemMessage("short"), new UserMessage("Test message")), options);
-		this.chatModel.call(prompt);
-
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Ensure no cache_control present since system content was below min length
-		String req = requestBody.toString();
-		assertThat(req).doesNotContain("\"cache_control\"");
-	}
-
-	@Test
-	void testCustomContentLengthFunctionEnablesCaching() throws Exception {
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [ { "type": "text", "text": "ok" } ],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": { "input_tokens": 10, "output_tokens": 2 }
-				}
-				""";
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		AnthropicCacheOptions cacheOptions = AnthropicCacheOptions.builder()
-			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-			.messageTypeMinContentLength(MessageType.SYSTEM, 1000)
-			.contentLengthFunction(s -> 2000) // force eligibility even for short text
-			.build();
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder().cacheOptions(cacheOptions).build();
-		Prompt prompt = new Prompt(List.of(new SystemMessage("short"), new UserMessage("Test message")), options);
-		this.chatModel.call(prompt);
-
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-		JsonNode systemNode = requestBody.get("system");
-		if (systemNode != null && systemNode.isArray()) {
-			JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1);
-			assertThat(lastSystemBlock.has("cache_control")).isTrue();
-		}
-	}
-
-	@Test
-	void testSystemAndToolsCacheStrategy() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "I'll help you with the weather information."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 150,
-						"output_tokens": 25
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Create tool callback
-		var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
-		MethodToolCallback toolCallback = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
-			.toolMethod(toolMethod)
-			.build();
-
-		// Test with SYSTEM_AND_TOOLS cache strategy
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build())
-			.toolCallbacks(List.of(toolCallback))
-			.build();
-
-		ChatClient chatClient = ChatClient.create(this.chatModel);
-		String response = chatClient.prompt()
-			.user("What's the weather like in San Francisco?")
-			.options(options)
-			.call()
-			.content();
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify tools array exists and last tool has cache control
-		assertThat(requestBody.has("tools")).isTrue();
-		JsonNode toolsArray = requestBody.get("tools");
-		assertThat(toolsArray.isArray()).isTrue();
-		assertThat(toolsArray.size()).isGreaterThan(0);
-
-		JsonNode lastTool = toolsArray.get(toolsArray.size() - 1);
-		assertThat(lastTool.has("cache_control")).isTrue();
-		assertThat(lastTool.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
-
-		// Verify system message also has cache control
-		if (requestBody.has("system")) {
-			JsonNode systemNode = requestBody.get("system");
-			if (systemNode.isArray()) {
-				JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1);
-				assertThat(lastSystemBlock.has("cache_control")).isTrue();
-			}
-		}
-
-		// Verify response
-		assertThat(response).contains("weather information");
-	}
-
-	@Test
-	void testConversationHistoryCacheStrategy() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Based on our previous conversation, I can help with that."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 200,
-						"output_tokens": 30
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Test with CONVERSATION_HISTORY cache strategy
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build())
-			.build();
-
-		// Create a prompt with conversation history
-		Prompt prompt = new Prompt(List.of(new UserMessage("Previous question about weather"),
-				new UserMessage("What about tomorrow's forecast?")), options);
-
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify messages array exists
-		assertThat(requestBody.has("messages")).isTrue();
-		JsonNode messagesArray = requestBody.get("messages");
-		assertThat(messagesArray.isArray()).isTrue();
-		assertThat(messagesArray.size()).isGreaterThan(1);
-
-		// Verify the last message has cache control (conversation history)
-		if (messagesArray.size() >= 1) {
-			JsonNode lastMessage = messagesArray.get(messagesArray.size() - 1);
-			assertThat(lastMessage.has("content")).isTrue();
-			JsonNode contentArray = lastMessage.get("content");
-			if (contentArray.isArray() && contentArray.size() > 0) {
-				JsonNode lastContentBlock = contentArray.get(contentArray.size() - 1);
-				assertThat(lastContentBlock.has("cache_control")).isTrue();
-				assertThat(lastContentBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
-			}
-		}
-
-		// Verify response
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).contains("previous conversation");
-	}
-
-	@Test
-	void testNoCacheStrategy() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Simple response without caching."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 20,
-						"output_tokens": 10
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Test with NONE cache strategy (default)
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build())
-			.build();
-
-		Prompt prompt = new Prompt("Simple test message", options);
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify NO cache_control fields exist anywhere
-		String requestBodyString = requestBody.toString();
-		assertThat(requestBodyString).doesNotContain("cache_control");
-
-		// Verify response
-		assertThat(response).isNotNull();
-		assertThat(response.getResult().getOutput().getText()).contains("Simple response");
-	}
-
-	@Test
-	void testCacheTtlHeader() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Response with 1-hour cache TTL."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 30,
-						"output_tokens": 15
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Test with 1-hour cache TTL
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder()
-				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-				.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-				.build())
-			.build();
-
-		Prompt prompt = new Prompt(
-				List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Test message")), options);
-
-		this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Verify the beta header is present for 1-hour cache
-		assertThat(recordedRequest.getHeader("anthropic-beta")).contains("extended-cache-ttl-2025-04-11");
-	}
-
-	@Test
-	void testFourBreakpointLimitEnforcement() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Response with maximum cache breakpoints."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 500,
-						"output_tokens": 20
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Create multiple tools to test breakpoint limits
-		var weatherMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
-		var calculateMethod = ReflectionUtils.findMethod(TestTools.class, "calculate", String.class);
-		var searchMethod = ReflectionUtils.findMethod(TestTools.class, "search", String.class);
-
-		MethodToolCallback weatherTool = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(weatherMethod).description("Get weather information").build())
-			.toolMethod(weatherMethod)
-			.build();
-
-		MethodToolCallback calculateTool = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(calculateMethod).description("Calculate expressions").build())
-			.toolMethod(calculateMethod)
-			.build();
-
-		MethodToolCallback searchTool = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(searchMethod).description("Search for information").build())
-			.toolMethod(searchMethod)
-			.build();
-
-		// Test with SYSTEM_AND_TOOLS strategy and multiple large system messages
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build())
-			.toolCallbacks(List.of(weatherTool, calculateTool, searchTool))
-			.build();
-
-		// Create multiple large system messages and user messages to potentially exceed 4
-		// breakpoints
-		String largeSystemMsg1 = "System message 1: " + "A".repeat(1200);
-		String largeSystemMsg2 = "System message 2: " + "B".repeat(1200);
-		String largeUserMsg1 = "User message 1: " + "C".repeat(1200);
-		String largeUserMsg2 = "User message 2: " + "D".repeat(1200);
-
-		Prompt prompt = new Prompt(List.of(new SystemMessage(largeSystemMsg1), new SystemMessage(largeSystemMsg2),
-				new UserMessage(largeUserMsg1), new UserMessage(largeUserMsg2)), options);
-
-		this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Count cache_control occurrences in the entire request
-		int cacheControlCount = countCacheControlOccurrences(requestBody);
-
-		// Verify we don't exceed Anthropic's 4-breakpoint limit
-		assertThat(cacheControlCount)
-			.withFailMessage("Cache breakpoints should not exceed 4, but found %d", cacheControlCount)
-			.isLessThanOrEqualTo(4);
-	}
-
-	@Test
-	void testWireFormatConsistency() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Response for wire format test."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 200,
-						"output_tokens": 15
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Test with SYSTEM_ONLY caching strategy
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
-			.build();
-
-		Prompt prompt = new Prompt(
-				List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Hello!")), options);
-
-		this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify that cache_control is included in the wire format for SYSTEM_ONLY
-		// strategy
-		// Anthropic's API will handle token threshold validation
-
-		// For SYSTEM_ONLY caching, system message should be in the "system" field with
-		// cache_control
-		assertThat(requestBody.has("system")).withFailMessage("SYSTEM_ONLY strategy should include system field")
-			.isTrue();
-
-		JsonNode systemNode = requestBody.get("system");
-		if (systemNode.isArray()) {
-			JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1);
-			assertThat(lastSystemBlock.has("cache_control"))
-				.withFailMessage("SYSTEM_ONLY strategy should include cache_control in wire format")
-				.isTrue();
-			assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
-		}
-		else if (systemNode.isTextual()) {
-			// Simple text system message should still have cache_control applied at the
-			// message level
-			// Check if there's a cache_control field at the system level or in a wrapper
-			assertThat(requestBody.toString())
-				.withFailMessage("SYSTEM_ONLY strategy should include cache_control in wire format")
-				.contains("cache_control");
-		}
-	}
-
-	@Test
-	void testComplexMultiBreakpointScenario() throws Exception {
-		// Mock response
-		String mockResponse = """
-				{
-					"id": "msg_test123",
-					"type": "message",
-					"role": "assistant",
-					"content": [
-						{
-							"type": "text",
-							"text": "Response for complex multi-breakpoint scenario."
-						}
-					],
-					"model": "claude-haiku-4-5",
-					"stop_reason": "end_turn",
-					"usage": {
-						"input_tokens": 800,
-						"output_tokens": 25
-					}
-				}
-				""";
-
-		this.mockWebServer
-			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
-
-		// Create tools for complex scenario
-		var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
-		MethodToolCallback toolCallback = MethodToolCallback.builder()
-			.toolDefinition(ToolDefinitions.builder(toolMethod).description("Complex weather tool").build())
-			.toolMethod(toolMethod)
-			.build();
-
-		// Test SYSTEM_AND_TOOLS with large content and conversation history
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build())
-			.toolCallbacks(List.of(toolCallback))
-			.build();
-
-		// Create large system message (should get cached)
-		String largeSystemMessage = "System: You are a weather assistant. " + "X".repeat(1200);
-
-		// Create conversation with multiple user messages (history scenario)
-		String userMessage1 = "Previous question about weather in NYC " + "Y".repeat(1200);
-		String userMessage2 = "Follow-up question about tomorrow's forecast " + "Z".repeat(1200);
-		String currentUserMessage = "What about this weekend?";
-
-		Prompt prompt = new Prompt(List.of(new SystemMessage(largeSystemMessage), new UserMessage(userMessage1),
-				new UserMessage(userMessage2), new UserMessage(currentUserMessage)), options);
-
-		this.chatModel.call(prompt);
-
-		// Verify request was made
-		RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS);
-		assertThat(recordedRequest).isNotNull();
-
-		// Parse and validate request body
-		JsonNode requestBody = JsonMapper.shared().readTree(recordedRequest.getBody().readUtf8());
-
-		// Verify system message has cache control (SYSTEM_AND_TOOLS strategy)
-		assertThat(requestBody.has("system")).isTrue();
-		JsonNode systemNode = requestBody.get("system");
-		if (systemNode.isArray()) {
-			JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1);
-			assertThat(lastSystemBlock.has("cache_control")).isTrue();
-		}
-
-		// Verify tools have cache control (SYSTEM_AND_TOOLS strategy)
-		assertThat(requestBody.has("tools")).isTrue();
-		JsonNode toolsArray = requestBody.get("tools");
-		if (toolsArray.isArray() && toolsArray.size() > 0) {
-			JsonNode lastTool = toolsArray.get(toolsArray.size() - 1);
-			assertThat(lastTool.has("cache_control")).isTrue();
-		}
-
-		// Verify proper ordering and cache control placement
-		int cacheControlCount = countCacheControlOccurrences(requestBody);
-		assertThat(cacheControlCount)
-			.withFailMessage("Complex scenario should not exceed 4 cache breakpoints, found %d", cacheControlCount)
-			.isLessThanOrEqualTo(4);
-
-		// Verify cache_control is only on the LAST blocks of each section (system, tools)
-		// This ensures proper breakpoint placement according to Anthropic's requirements
-		verifyCacheControlPlacement(requestBody);
-	}
-
-	/**
-	 * Helper method to count cache_control occurrences in the request JSON.
-	 */
-	private int countCacheControlOccurrences(JsonNode node) {
-		int count = 0;
-		if (node.isObject()) {
-			if (node.has("cache_control")) {
-				count++;
-			}
-			for (JsonNode child : node.values()) {
-				count += countCacheControlOccurrences(child);
-			}
-		}
-		else if (node.isArray()) {
-			for (JsonNode child : node) {
-				count += countCacheControlOccurrences(child);
-			}
-		}
-		return count;
-	}
-
-	/**
-	 * Helper method to verify cache_control is only placed on the last blocks of each
-	 * section.
-	 */
-	private void verifyCacheControlPlacement(JsonNode requestBody) {
-		// Verify system cache control is only on the last system block
-		if (requestBody.has("system")) {
-			JsonNode systemNode = requestBody.get("system");
-			if (systemNode.isArray()) {
-				for (int i = 0; i < systemNode.size() - 1; i++) {
-					JsonNode systemBlock = systemNode.get(i);
-					assertThat(systemBlock.has("cache_control"))
-						.withFailMessage("Only the last system block should have cache_control, but block %d has it", i)
-						.isFalse();
-				}
-			}
-		}
-
-		// Verify tools cache control is only on the last tool
-		if (requestBody.has("tools")) {
-			JsonNode toolsArray = requestBody.get("tools");
-			if (toolsArray.isArray()) {
-				for (int i = 0; i < toolsArray.size() - 1; i++) {
-					JsonNode tool = toolsArray.get(i);
-					assertThat(tool.has("cache_control"))
-						.withFailMessage("Only the last tool should have cache_control, but tool %d has it", i)
-						.isFalse();
-				}
-			}
-		}
-
-		// Verify messages cache control is only on the last content block of the
-		// appropriate message
-		if (requestBody.has("messages")) {
-			JsonNode messagesArray = requestBody.get("messages");
-			if (messagesArray.isArray()) {
-				// For conversation history caching, only second-to-last message should
-				// have cache control
-				for (int i = 0; i < messagesArray.size(); i++) {
-					JsonNode message = messagesArray.get(i);
-					if (message.has("content") && message.get("content").isArray()) {
-						JsonNode contentArray = message.get("content");
-						for (int j = 0; j < contentArray.size() - 1; j++) {
-							JsonNode contentBlock = contentArray.get(j);
-							if (i != messagesArray.size() - 2 || j != contentArray.size() - 1) {
-								// Only the last content block of the second-to-last
-								// message should have cache_control
-								assertThat(contentBlock.has("cache_control"))
-									.withFailMessage(
-											"Unexpected cache_control placement in message %d, content block %d", i, j)
-									.isFalse();
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	/**
-	 * Test tools class for mock testing.
-	 */
-	public static class TestTools {
-
-		@Tool(description = "Get weather information for a location")
-		public static String getWeather(String location) {
-			return "Weather in " + location + " is sunny, 22°C";
-		}
-
-		@Tool(description = "Calculate mathematical expressions")
-		public static String calculate(String expression) {
-			return "Result: 42";
-		}
-
-		@Tool(description = "Search for information")
-		public static String search(String query) {
-			return "Search results for: " + query;
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsIT.java
index 2c2c3c7c075..aeb69496fc5 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsIT.java
@@ -21,14 +21,16 @@
 import java.nio.file.Path;
 import java.util.List;
 
+import com.anthropic.client.AnthropicClient;
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.ToolChoice;
+import com.anthropic.models.messages.ToolChoiceAny;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import org.junit.jupiter.api.io.TempDir;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicSkill;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
@@ -41,7 +43,7 @@
 import static org.assertj.core.api.Assertions.assertThat;
 
 /**
- * Integration tests for Anthropic Skills API support.
+ * Integration tests for Anthropic Skills API support via the Java SDK.
  *
  * @author Soby Chacko
  * @since 2.0.0
@@ -56,214 +58,69 @@ class AnthropicSkillsIT {
 	private AnthropicChatModel chatModel;
 
 	@Autowired
-	private AnthropicApi anthropicApi;
+	private AnthropicClient anthropicClient;
 
 	@Test
 	void shouldGenerateExcelWithXlsxSkill(@TempDir Path tempDir) throws IOException {
-		// Create a prompt requesting Excel generation
-		// Use explicit language to trigger skill execution
 		UserMessage userMessage = new UserMessage(
-				"Please create an Excel file (.xlsx) with 3 columns: Name, Age, City. Add 5 sample rows of data. "
-						+ "Generate the actual file using the xlsx skill.");
+				"Please create an Excel file (.xlsx) with 3 columns: Name, Age, City. "
+						+ "Add 5 sample rows of data. Generate the actual file using the xlsx skill.");
 
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
+			.model(Model.CLAUDE_SONNET_4_5)
 			.maxTokens(4096)
 			.skill(AnthropicSkill.XLSX)
-			.toolChoice(new AnthropicApi.ToolChoiceAny())
+			.toolChoice(ToolChoice.ofAny(ToolChoiceAny.builder().build()))
 			.internalToolExecutionEnabled(false)
 			.build();
 
 		Prompt prompt = new Prompt(List.of(userMessage), options);
-
-		// Call the model
 		ChatResponse response = this.chatModel.call(prompt);
 
-		// Verify response exists and is not empty
 		assertThat(response).isNotNull();
 		assertThat(response.getResults()).isNotEmpty();
 		String responseText = response.getResult().getOutput().getText();
 		assertThat(responseText).as("Response text should not be blank").isNotBlank();
-
-		// Log the response for debugging
 		logger.info("XLSX Skill Response: {}", responseText);
 
-		// Log metadata for debugging
-		if (response.getMetadata() != null) {
-			logger.info("Response Metadata: {}", response.getMetadata());
-		}
-
-		// Verify the response mentions Excel/spreadsheet creation
-		// The exact content may vary, but it should reference the created file
 		assertThat(responseText.toLowerCase()).as("Response should mention spreadsheet or Excel")
 			.containsAnyOf("spreadsheet", "excel", "xlsx", "created", "file");
 
-		// Extract file IDs from the response
 		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
 		assertThat(fileIds).as("Skills response should contain at least one file ID").isNotEmpty();
-
 		logger.info("Extracted {} file ID(s): {}", fileIds.size(), fileIds);
 
-		// Download all files
-		List<Path> downloadedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(response, this.anthropicApi,
+		List<Path> downloadedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(response, this.anthropicClient,
 				tempDir);
 		assertThat(downloadedFiles).as("Should download at least one file").isNotEmpty();
 
-		// Verify files exist and have content
 		for (Path filePath : downloadedFiles) {
 			assertThat(filePath).exists();
 			assertThat(Files.size(filePath)).as("Downloaded file should not be empty").isGreaterThan(0);
 			logger.info("Downloaded file: {} ({} bytes)", filePath.getFileName(), Files.size(filePath));
 		}
 
-		// Verify at least one Excel file was created
 		boolean hasXlsxFile = downloadedFiles.stream()
 			.anyMatch(path -> path.toString().toLowerCase().endsWith(".xlsx"));
 		assertThat(hasXlsxFile).as("At least one .xlsx file should be downloaded").isTrue();
 	}
 
-	@Test
-	void shouldGeneratePowerPointWithPptxSkill(@TempDir Path tempDir) throws IOException {
-		// Create a prompt requesting PowerPoint generation
-		// Use explicit language to trigger skill execution
-		UserMessage userMessage = new UserMessage(
-				"Please create a PowerPoint presentation file (.pptx) about Spring AI with 3 slides: "
-						+ "Introduction, Features, and Conclusion. Generate the actual file using the pptx skill.");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(4096)
-			.skill(AnthropicSkill.PPTX)
-			.toolChoice(new AnthropicApi.ToolChoiceAny())
-			.internalToolExecutionEnabled(false)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-
-		// Call the model
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response exists and is not empty
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		String responseText = response.getResult().getOutput().getText();
-		assertThat(responseText).as("Response text should not be blank").isNotBlank();
-
-		// Log the response for debugging
-		logger.info("PPTX Skill Response: {}", responseText);
-
-		// Verify the response mentions PowerPoint/presentation creation
-		assertThat(responseText.toLowerCase()).as("Response should mention presentation or PowerPoint")
-			.containsAnyOf("presentation", "powerpoint", "pptx", "slide", "created", "file");
-
-		// Extract file IDs from the response
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-		assertThat(fileIds).as("Skills response should contain at least one file ID").isNotEmpty();
-
-		logger.info("Extracted {} file ID(s): {}", fileIds.size(), fileIds);
-
-		// Download all files
-		List<Path> downloadedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(response, this.anthropicApi,
-				tempDir);
-		assertThat(downloadedFiles).as("Should download at least one file").isNotEmpty();
-
-		// Verify files exist and have content
-		for (Path filePath : downloadedFiles) {
-			assertThat(filePath).exists();
-			assertThat(Files.size(filePath)).as("Downloaded file should not be empty").isGreaterThan(0);
-			logger.info("Downloaded file: {} ({} bytes)", filePath.getFileName(), Files.size(filePath));
-		}
-
-		// Verify at least one PowerPoint file was created
-		boolean hasPptxFile = downloadedFiles.stream()
-			.anyMatch(path -> path.toString().toLowerCase().endsWith(".pptx"));
-		assertThat(hasPptxFile).as("At least one .pptx file should be downloaded").isTrue();
-	}
-
-	@Test
-	void shouldUseMultipleSkills(@TempDir Path tempDir) throws IOException {
-		// Create a prompt that could use multiple skills
-		// Use explicit language to trigger skill execution
-		UserMessage userMessage = new UserMessage(
-				"Please create two files: 1) An Excel file (.xlsx) with sample sales data (use xlsx skill), "
-						+ "and 2) A PowerPoint presentation file (.pptx) summarizing the data (use pptx skill). "
-						+ "Generate the actual files.");
-
-		AnthropicChatOptions options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-			.maxTokens(4096)
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX)
-			.toolChoice(new AnthropicApi.ToolChoiceAny())
-			.internalToolExecutionEnabled(false)
-			.build();
-
-		Prompt prompt = new Prompt(List.of(userMessage), options);
-
-		// Call the model
-		ChatResponse response = this.chatModel.call(prompt);
-
-		// Verify response exists and is not empty
-		assertThat(response).isNotNull();
-		assertThat(response.getResults()).isNotEmpty();
-		String responseText = response.getResult().getOutput().getText();
-		assertThat(responseText).as("Response text should not be blank").isNotBlank();
-
-		// Log the response for debugging
-		logger.info("Multiple Skills Response: {}", responseText);
-
-		// Verify the response mentions document creation
-		assertThat(responseText.toLowerCase()).as("Response should mention file creation")
-			.containsAnyOf("spreadsheet", "presentation", "created", "file", "xlsx", "pptx");
-
-		// Extract file IDs from the response
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-		assertThat(fileIds).as("Skills response should contain at least one file ID").isNotEmpty();
-
-		logger.info("Extracted {} file ID(s): {}", fileIds.size(), fileIds);
-
-		// Download all files
-		List<Path> downloadedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(response, this.anthropicApi,
-				tempDir);
-		assertThat(downloadedFiles).as("Should download at least one file").isNotEmpty();
-		assertThat(downloadedFiles.size()).as("Should download multiple files").isGreaterThanOrEqualTo(2);
-
-		// Verify files exist and have content
-		for (Path filePath : downloadedFiles) {
-			assertThat(filePath).exists();
-			assertThat(Files.size(filePath)).as("Downloaded file should not be empty").isGreaterThan(0);
-			logger.info("Downloaded file: {} ({} bytes)", filePath.getFileName(), Files.size(filePath));
-		}
-
-		// Verify both file types were created
-		boolean hasXlsxFile = downloadedFiles.stream()
-			.anyMatch(path -> path.toString().toLowerCase().endsWith(".xlsx"));
-		boolean hasPptxFile = downloadedFiles.stream()
-			.anyMatch(path -> path.toString().toLowerCase().endsWith(".pptx"));
-
-		assertThat(hasXlsxFile || hasPptxFile).as("At least one .xlsx or .pptx file should be downloaded").isTrue();
-	}
-
 	@SpringBootConfiguration
 	public static class Config {
 
 		@Bean
-		public AnthropicApi anthropicApi() {
-			return AnthropicApi.builder().apiKey(getApiKey()).build();
-		}
-
-		private String getApiKey() {
+		public AnthropicClient anthropicClient() {
 			String apiKey = System.getenv("ANTHROPIC_API_KEY");
 			if (!StringUtils.hasText(apiKey)) {
 				throw new IllegalArgumentException(
 						"You must provide an API key. Put it in an environment variable under the name ANTHROPIC_API_KEY");
 			}
-			return apiKey;
+			return AnthropicSetup.setupSyncClient(null, apiKey, null, null, null, null);
 		}
 
 		@Bean
-		public AnthropicChatModel anthropicChatModel(AnthropicApi api) {
-			return AnthropicChatModel.builder().anthropicApi(api).build();
+		public AnthropicChatModel anthropicChatModel(AnthropicClient client) {
+			return AnthropicChatModel.builder().anthropicClient(client).build();
 		}
 
 	}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelperTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelperTests.java
index 67f5e7b0a88..e0410ccf237 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelperTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicSkillsResponseHelperTests.java
@@ -17,218 +17,127 @@
 package org.springframework.ai.anthropic;
 
 import java.util.List;
+import java.util.Optional;
 
+import com.anthropic.models.messages.Container;
+import com.anthropic.models.messages.ContainerUploadBlock;
+import com.anthropic.models.messages.ContentBlock;
+import com.anthropic.models.messages.Message;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.junit.jupiter.MockitoExtension;
 
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
 import org.springframework.ai.chat.metadata.ChatResponseMetadata;
 import org.springframework.ai.chat.model.ChatResponse;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.BDDMockito.given;
+import static org.mockito.Mockito.mock;
 
 /**
  * Unit tests for {@link AnthropicSkillsResponseHelper}.
  *
  * @author Soby Chacko
- * @since 2.0.0
  */
+@ExtendWith(MockitoExtension.class)
 class AnthropicSkillsResponseHelperTests {
 
 	@Test
-	void shouldReturnEmptyListForNullResponse() {
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(null);
-		assertThat(fileIds).isEmpty();
+	void extractFileIdsReturnsEmptyForNullResponse() {
+		assertThat(AnthropicSkillsResponseHelper.extractFileIds(null)).isEmpty();
 	}
 
 	@Test
-	void shouldReturnEmptyListForResponseWithoutMetadata() {
-		ChatResponse response = new ChatResponse(List.of());
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-		assertThat(fileIds).isEmpty();
+	void extractFileIdsReturnsEmptyForNullMetadata() {
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(null);
+		assertThat(AnthropicSkillsResponseHelper.extractFileIds(response)).isEmpty();
 	}
 
 	@Test
-	void shouldReturnEmptyListWhenNoFileContentBlocks() {
-		// Create a response with text content but no files
-		ContentBlock textBlock = new ContentBlock("Sample text response");
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null, List.of(textBlock),
-				"claude-sonnet-4-5", null, null, null, null);
-
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
-
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-		assertThat(fileIds).isEmpty();
-	}
-
-	@Test
-	void shouldExtractSingleFileId() {
-		// Create a file content block
-		ContentBlock fileBlock = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, null, "file_abc123", "report.xlsx");
-
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null, List.of(fileBlock),
-				"claude-sonnet-4-5", null, null, null, null);
-
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
-
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-
-		assertThat(fileIds).hasSize(1);
-		assertThat(fileIds).containsExactly("file_abc123");
-	}
-
-	@Test
-	void shouldExtractMultipleFileIds() {
-		// Create multiple file content blocks
-		ContentBlock file1 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_123", "report.xlsx");
-
-		ContentBlock file2 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_456", "presentation.pptx");
-
-		ContentBlock file3 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_789", "document.docx");
-
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null,
-				List.of(file1, file2, file3), "claude-sonnet-4-5", null, null, null, null);
-
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
-
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-
-		assertThat(fileIds).hasSize(3);
-		assertThat(fileIds).containsExactly("file_123", "file_456", "file_789");
+	void extractFileIdsReturnsEmptyForNonMessageMetadata() {
+		ChatResponseMetadata metadata = mock(ChatResponseMetadata.class);
+		given(metadata.get("anthropic-response")).willReturn("not a message");
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(metadata);
+		assertThat(AnthropicSkillsResponseHelper.extractFileIds(response)).isEmpty();
 	}
 
 	@Test
-	void shouldExtractFileIdsFromMixedContent() {
-		// Mix of text and file content blocks
-		ContentBlock textBlock = new ContentBlock("I've created the files you requested");
-
-		ContentBlock file1 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_excel", "data.xlsx");
+	void extractFileIdsFindsContainerUploadBlocks() {
+		ContainerUploadBlock uploadBlock1 = mock(ContainerUploadBlock.class);
+		given(uploadBlock1.fileId()).willReturn("file-abc-123");
+		ContainerUploadBlock uploadBlock2 = mock(ContainerUploadBlock.class);
+		given(uploadBlock2.fileId()).willReturn("file-def-456");
 
-		ContentBlock file2 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_pdf", "summary.pdf");
+		ContentBlock block1 = mock(ContentBlock.class);
+		given(block1.isContainerUpload()).willReturn(true);
+		given(block1.asContainerUpload()).willReturn(uploadBlock1);
 
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null,
-				List.of(textBlock, file1, file2), "claude-sonnet-4-5", null, null, null, null);
+		ContentBlock block2 = mock(ContentBlock.class);
+		given(block2.isContainerUpload()).willReturn(true);
+		given(block2.asContainerUpload()).willReturn(uploadBlock2);
 
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
+		Message message = mock(Message.class);
+		given(message.content()).willReturn(List.of(block1, block2));
 
-		ChatResponse response = new ChatResponse(List.of(), metadata);
+		ChatResponseMetadata metadata = mock(ChatResponseMetadata.class);
+		given(metadata.get("anthropic-response")).willReturn(message);
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(metadata);
 
 		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-
-		assertThat(fileIds).hasSize(2);
-		assertThat(fileIds).containsExactly("file_excel", "file_pdf");
+		assertThat(fileIds).containsExactly("file-abc-123", "file-def-456");
 	}
 
 	@Test
-	void shouldReturnNullContainerIdForNullResponse() {
-		String containerId = AnthropicSkillsResponseHelper.extractContainerId(null);
-		assertThat(containerId).isNull();
-	}
+	void extractFileIdsSkipsNonContainerUploadBlocks() {
+		ContentBlock textBlock = mock(ContentBlock.class);
+		given(textBlock.isContainerUpload()).willReturn(false);
 
-	@Test
-	void shouldReturnNullContainerIdForResponseWithoutMetadata() {
-		ChatResponse response = new ChatResponse(List.of());
-		String containerId = AnthropicSkillsResponseHelper.extractContainerId(response);
-		assertThat(containerId).isNull();
-	}
-
-	@Test
-	void shouldReturnNullContainerIdWhenNotPresent() {
-		ContentBlock textBlock = new ContentBlock("Response without container");
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null, List.of(textBlock),
-				"claude-sonnet-4-5", null, null, null, null);
+		Message message = mock(Message.class);
+		given(message.content()).willReturn(List.of(textBlock));
 
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
+		ChatResponseMetadata metadata = mock(ChatResponseMetadata.class);
+		given(metadata.get("anthropic-response")).willReturn(message);
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(metadata);
 
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		String containerId = AnthropicSkillsResponseHelper.extractContainerId(response);
-		assertThat(containerId).isNull();
+		assertThat(AnthropicSkillsResponseHelper.extractFileIds(response)).isEmpty();
 	}
 
 	@Test
-	void shouldExtractContainerId() {
-		ContentBlock textBlock = new ContentBlock("Response with container");
-		ChatCompletionResponse.Container container = new ChatCompletionResponse.Container("container_xyz789");
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null, List.of(textBlock),
-				"claude-sonnet-4-5", null, null, null, container);
-
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
-
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		String containerId = AnthropicSkillsResponseHelper.extractContainerId(response);
-		assertThat(containerId).isEqualTo("container_xyz789");
+	void extractContainerIdReturnsNullForNullResponse() {
+		assertThat(AnthropicSkillsResponseHelper.extractContainerId(null)).isNull();
 	}
 
 	@Test
-	void shouldHandleMultipleFileBlocks() {
-		// Response with multiple file blocks in content
-		ContentBlock file1 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_1", "file1.xlsx");
-		ContentBlock file2 = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, "file_2", "file2.pptx");
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_1", "message", null, List.of(file1, file2),
-				"claude-sonnet-4-5", null, null, null, null);
+	void extractContainerIdReturnsIdWhenPresent() {
+		Container container = mock(Container.class);
+		given(container.id()).willReturn("cntr-abc-123");
 
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
+		Message message = mock(Message.class);
+		given(message.container()).willReturn(Optional.of(container));
 
-		ChatResponse response = new ChatResponse(List.of(), metadata);
+		ChatResponseMetadata metadata = mock(ChatResponseMetadata.class);
+		given(metadata.get("anthropic-response")).willReturn(message);
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(metadata);
 
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-		assertThat(fileIds).hasSize(2);
-		assertThat(fileIds).containsExactly("file_1", "file_2");
+		assertThat(AnthropicSkillsResponseHelper.extractContainerId(response)).isEqualTo("cntr-abc-123");
 	}
 
 	@Test
-	void shouldIgnoreFileBlocksWithoutFileId() {
-		// File block with null fileId should be ignored
-		ContentBlock invalidFileBlock = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, null, null, null, "file.xlsx");
+	void extractContainerIdReturnsNullWhenNoContainer() {
+		Message message = mock(Message.class);
+		given(message.container()).willReturn(Optional.empty());
 
-		ContentBlock validFileBlock = new ContentBlock(ContentBlock.Type.FILE, null, null, null, null, null, null, null,
-				null, null, null, null, null, null, null, null, "file_valid", "valid.xlsx");
-
-		ChatCompletionResponse apiResponse = new ChatCompletionResponse("msg_123", "message", null,
-				List.of(invalidFileBlock, validFileBlock), "claude-sonnet-4-5", null, null, null, null);
-
-		ChatResponseMetadata metadata = ChatResponseMetadata.builder()
-			.keyValue("anthropic-response", apiResponse)
-			.build();
-
-		ChatResponse response = new ChatResponse(List.of(), metadata);
-
-		List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
+		ChatResponseMetadata metadata = mock(ChatResponseMetadata.class);
+		given(metadata.get("anthropic-response")).willReturn(message);
+		ChatResponse response = mock(ChatResponse.class);
+		given(response.getMetadata()).willReturn(metadata);
 
-		// Should only extract the valid file ID
-		assertThat(fileIds).hasSize(1);
-		assertThat(fileIds).containsExactly("file_valid");
+		assertThat(AnthropicSkillsResponseHelper.extractContainerId(response)).isNull();
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicTestConfiguration.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicTestConfiguration.java
index 568dd939ca6..e652ba2c22e 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicTestConfiguration.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicTestConfiguration.java
@@ -16,31 +16,20 @@
 
 package org.springframework.ai.anthropic;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
 import org.springframework.boot.SpringBootConfiguration;
 import org.springframework.context.annotation.Bean;
-import org.springframework.util.StringUtils;
 
+/**
+ * Context configuration for Anthropic Java SDK tests.
+ *
+ * @author Soby Chacko
+ */
 @SpringBootConfiguration
 public class AnthropicTestConfiguration {
 
 	@Bean
-	public AnthropicApi anthropicApi() {
-		return AnthropicApi.builder().apiKey(getApiKey()).build();
-	}
-
-	private String getApiKey() {
-		String apiKey = System.getenv("ANTHROPIC_API_KEY");
-		if (!StringUtils.hasText(apiKey)) {
-			throw new IllegalArgumentException(
-					"You must provide an API key.  Put it in an environment variable under the name ANTHROPIC_API_KEY");
-		}
-		return apiKey;
-	}
-
-	@Bean
-	public AnthropicChatModel anthropicChatModel(AnthropicApi api) {
-		return AnthropicChatModel.builder().anthropicApi(api).build();
+	public AnthropicChatModel anthropicChatModel() {
+		return AnthropicChatModel.builder().build();
 	}
 
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/CacheEligibilityResolverTests.java
similarity index 68%
rename from models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
rename to models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/CacheEligibilityResolverTests.java
index 1e277453c20..ec55b503b02 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/CacheEligibilityResolverTests.java
@@ -14,14 +14,11 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api.utils;
+package org.springframework.ai.anthropic;
 
+import com.anthropic.models.messages.CacheControlEphemeral;
 import org.junit.jupiter.api.Test;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicCacheOptions;
-import org.springframework.ai.anthropic.api.AnthropicCacheStrategy;
-import org.springframework.ai.anthropic.api.AnthropicCacheTtl;
 import org.springframework.ai.chat.messages.MessageType;
 
 import static org.assertj.core.api.Assertions.assertThat;
@@ -29,7 +26,6 @@
 /**
  * Tests for {@link CacheEligibilityResolver}.
  *
- * @author Austin Dase
  * @author Soby Chacko
  */
 class CacheEligibilityResolverTests {
@@ -55,17 +51,16 @@ void systemCachingRespectsMinLength() {
 		assertThat(resolver.resolve(MessageType.SYSTEM, "short")).isNull();
 
 		// Above min length -> cache control with default TTL
-		AnthropicApi.ChatCompletionRequest.CacheControl cc = resolver.resolve(MessageType.SYSTEM, "01234567890");
+		CacheControlEphemeral cc = resolver.resolve(MessageType.SYSTEM, "01234567890");
 		assertThat(cc).isNotNull();
-		assertThat(cc.type()).isEqualTo("ephemeral");
-		assertThat(cc.ttl()).isEqualTo(AnthropicCacheTtl.FIVE_MINUTES.getValue());
+		assertThat(cc.ttl()).isPresent();
+		assertThat(cc.ttl().get()).isEqualTo(CacheControlEphemeral.Ttl.TTL_5M);
 	}
 
 	@Test
 	void emptyTextShouldNotBeCachedEvenIfMinIsZero() {
 		AnthropicCacheOptions options = AnthropicCacheOptions.builder()
 			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-			// default min content length is 0
 			.build();
 		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
 		assertThat(resolver.resolve(MessageType.SYSTEM, "")).isNull();
@@ -79,7 +74,7 @@ void toolCacheControlRespectsStrategy() {
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
 		assertThat(none.resolveToolCacheControl()).isNull();
 
-		// SYSTEM_ONLY -> no explicit tool caching (tools cached implicitly via hierarchy)
+		// SYSTEM_ONLY -> no explicit tool caching
 		CacheEligibilityResolver sys = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
 			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
 			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
@@ -99,9 +94,10 @@ void toolCacheControlRespectsStrategy() {
 			.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
 			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
 			.build());
-		var cc = sysAndTools.resolveToolCacheControl();
+		CacheControlEphemeral cc = sysAndTools.resolveToolCacheControl();
 		assertThat(cc).isNotNull();
-		assertThat(cc.ttl()).isEqualTo(AnthropicCacheTtl.ONE_HOUR.getValue());
+		assertThat(cc.ttl()).isPresent();
+		assertThat(cc.ttl().get()).isEqualTo(CacheControlEphemeral.Ttl.TTL_1H);
 
 		// CONVERSATION_HISTORY -> tool caching enabled
 		CacheEligibilityResolver history = CacheEligibilityResolver
@@ -117,27 +113,14 @@ void toolsOnlyStrategyBehavior() {
 			.build();
 		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
 
-		// Caching is enabled
 		assertThat(resolver.isCachingEnabled()).isTrue();
-
-		// System messages should NOT be cached
-		assertThat(resolver.resolve(MessageType.SYSTEM, "Large system prompt with plenty of content"))
-			.as("System messages should not be cached with TOOLS_ONLY strategy")
-			.isNull();
-
-		// User messages should NOT be cached
+		assertThat(resolver.resolve(MessageType.SYSTEM, "Large system prompt with plenty of content")).isNull();
 		assertThat(resolver.resolve(MessageType.USER, "User message content")).isNull();
-
-		// Assistant messages should NOT be cached
 		assertThat(resolver.resolve(MessageType.ASSISTANT, "Assistant message content")).isNull();
-
-		// Tool messages should NOT be cached
 		assertThat(resolver.resolve(MessageType.TOOL, "Tool result content")).isNull();
 
-		// Tool definitions SHOULD be cached
-		AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
-		assertThat(toolCache).as("Tool definitions should be cached with TOOLS_ONLY strategy").isNotNull();
-		assertThat(toolCache.type()).isEqualTo("ephemeral");
+		CacheControlEphemeral toolCache = resolver.resolveToolCacheControl();
+		assertThat(toolCache).isNotNull();
 	}
 
 	@Test
@@ -148,24 +131,23 @@ void breakpointCountForEachStrategy() {
 		assertThat(none.resolveToolCacheControl()).isNull();
 		assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull();
 
-		// SYSTEM_ONLY: 1 breakpoint (system only, tools implicit)
+		// SYSTEM_ONLY: system cached, tools not explicitly cached
 		CacheEligibilityResolver systemOnly = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build());
-		assertThat(systemOnly.resolveToolCacheControl()).as("SYSTEM_ONLY should not explicitly cache tools").isNull();
+		assertThat(systemOnly.resolveToolCacheControl()).isNull();
 		assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull();
 
-		// TOOLS_ONLY: 1 breakpoint (tools only)
+		// TOOLS_ONLY: tools cached, system not cached
 		CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
-		assertThat(toolsOnly.resolveToolCacheControl()).as("TOOLS_ONLY should cache tools").isNotNull();
-		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).as("TOOLS_ONLY should not cache system").isNull();
+		assertThat(toolsOnly.resolveToolCacheControl()).isNotNull();
+		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).isNull();
 
-		// SYSTEM_AND_TOOLS: 2 breakpoints (tools + system)
+		// SYSTEM_AND_TOOLS: both cached
 		CacheEligibilityResolver systemAndTools = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
-		assertThat(systemAndTools.resolveToolCacheControl()).as("SYSTEM_AND_TOOLS should cache tools").isNotNull();
-		assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).as("SYSTEM_AND_TOOLS should cache system")
-			.isNotNull();
+		assertThat(systemAndTools.resolveToolCacheControl()).isNotNull();
+		assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).isNotNull();
 	}
 
 	@Test
@@ -186,7 +168,7 @@ void messageTypeEligibilityPerStrategy() {
 		assertThat(systemOnly.resolve(MessageType.ASSISTANT, "content")).isNull();
 		assertThat(systemOnly.resolve(MessageType.TOOL, "content")).isNull();
 
-		// TOOLS_ONLY: No message types eligible (only tool definitions)
+		// TOOLS_ONLY: No message types eligible
 		CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
 		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).isNull();
@@ -211,42 +193,16 @@ void messageTypeEligibilityPerStrategy() {
 		assertThat(history.resolve(MessageType.TOOL, "content")).isNotNull();
 	}
 
-	@Test
-	void toolsOnlyIsolationFromSystemChanges() {
-		// Validates that TOOLS_ONLY resolver behavior is consistent
-		// regardless of system message content (simulating different system prompts)
-		CacheEligibilityResolver resolver = CacheEligibilityResolver
-			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
-
-		// Different system prompts should all be ineligible for caching
-		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a helpful assistant"))
-			.as("System prompt 1 should not be cached")
-			.isNull();
-		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a STRICT validator"))
-			.as("System prompt 2 should not be cached")
-			.isNull();
-		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a creative writer"))
-			.as("System prompt 3 should not be cached")
-			.isNull();
-
-		// Tool cache eligibility should remain consistent
-		assertThat(resolver.resolveToolCacheControl()).as("Tools should always be cacheable").isNotNull();
-	}
-
 	@Test
 	void systemAndToolsIndependentBreakpoints() {
-		// Validates that SYSTEM_AND_TOOLS creates two independent eligibility checks
 		CacheEligibilityResolver resolver = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
 
-		// Both tools and system should be independently eligible
-		AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
-		AnthropicApi.ChatCompletionRequest.CacheControl systemCache = resolver.resolve(MessageType.SYSTEM, "content");
-
-		assertThat(toolCache).as("Tools should be cacheable").isNotNull();
-		assertThat(systemCache).as("System should be cacheable").isNotNull();
+		CacheControlEphemeral toolCache = resolver.resolveToolCacheControl();
+		CacheControlEphemeral systemCache = resolver.resolve(MessageType.SYSTEM, "content");
 
-		// They should use the same TTL (both use SYSTEM message type TTL)
+		assertThat(toolCache).isNotNull();
+		assertThat(systemCache).isNotNull();
 		assertThat(toolCache.ttl()).isEqualTo(systemCache.ttl());
 	}
 
@@ -257,17 +213,17 @@ void breakpointLimitEnforced() {
 			.build();
 		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
 
-		// Use up breakpoints by resolving multiple times
-		resolver.resolve(MessageType.SYSTEM, "content"); // Uses breakpoint 1
+		// Use up breakpoints
+		resolver.resolve(MessageType.SYSTEM, "content");
 		resolver.useCacheBlock();
-		resolver.resolve(MessageType.USER, "content"); // Uses breakpoint 2
+		resolver.resolve(MessageType.USER, "content");
 		resolver.useCacheBlock();
-		resolver.resolve(MessageType.ASSISTANT, "content"); // Uses breakpoint 3
+		resolver.resolve(MessageType.ASSISTANT, "content");
 		resolver.useCacheBlock();
-		resolver.resolve(MessageType.TOOL, "content"); // Uses breakpoint 4
+		resolver.resolve(MessageType.TOOL, "content");
 		resolver.useCacheBlock();
 
-		// 5th attempt should return null (all 4 breakpoints used)
+		// 5th attempt should return null
 		assertThat(resolver.resolve(MessageType.USER, "more content"))
 			.as("Should return null when all 4 breakpoints are used")
 			.isNull();
@@ -278,16 +234,25 @@ void emptyAndNullContentHandling() {
 		CacheEligibilityResolver resolver = CacheEligibilityResolver
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
 
-		// Empty string should not be cached
 		assertThat(resolver.resolve(MessageType.SYSTEM, "")).as("Empty string should not be cached").isNull();
-
-		// Null should not be cached
 		assertThat(resolver.resolve(MessageType.SYSTEM, null)).as("Null content should not be cached").isNull();
-
-		// Whitespace-only should be cached if it meets length requirement
 		assertThat(resolver.resolve(MessageType.SYSTEM, "   "))
 			.as("Whitespace-only content meeting length requirements should be cacheable")
 			.isNotNull();
 	}
 
+	@Test
+	void oneHourTtlReturnedForConfiguredMessageType() {
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+			.build();
+		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
+
+		CacheControlEphemeral cc = resolver.resolve(MessageType.SYSTEM, "enough content");
+		assertThat(cc).isNotNull();
+		assertThat(cc.ttl()).isPresent();
+		assertThat(cc.ttl().get()).isEqualTo(CacheControlEphemeral.Ttl.TTL_1H);
+	}
+
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/ChatCompletionRequestTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/ChatCompletionRequestTests.java
deleted file mode 100644
index a31844a91fd..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/ChatCompletionRequestTests.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.chat.prompt.Prompt;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * @author Christian Tzolov
- * @author Alexandros Pappas
- * @author Thomas Vitale
- */
-public class ChatCompletionRequestTests {
-
-	@Test
-	public void createRequestWithChatOptions() {
-
-		var client = AnthropicChatModel.builder()
-			.anthropicApi(AnthropicApi.builder().apiKey("TEST").build())
-			.defaultOptions(
-					AnthropicChatOptions.builder().model("DEFAULT_MODEL").maxTokens(500).temperature(66.6).build())
-			.build();
-
-		var prompt = client.buildRequestPrompt(new Prompt("Test message content"));
-
-		var request = client.createRequest(prompt, false);
-
-		assertThat(request.messages()).hasSize(1);
-		assertThat(request.stream()).isFalse();
-
-		assertThat(request.model()).isEqualTo("DEFAULT_MODEL");
-		assertThat(request.temperature()).isEqualTo(66.6);
-
-		prompt = client.buildRequestPrompt(new Prompt("Test message content",
-				AnthropicChatOptions.builder().model("PROMPT_MODEL").temperature(99.9).build()));
-
-		request = client.createRequest(prompt, true);
-
-		assertThat(request.messages()).hasSize(1);
-		assertThat(request.stream()).isTrue();
-
-		assertThat(request.model()).isEqualTo("PROMPT_MODEL");
-		assertThat(request.temperature()).isEqualTo(99.9);
-	}
-
-	@Test
-	public void createRequestWithToolChoice() {
-
-		var client = AnthropicChatModel.builder()
-			.anthropicApi(AnthropicApi.builder().apiKey("TEST").build())
-			.defaultOptions(AnthropicChatOptions.builder().model("DEFAULT_MODEL").maxTokens(500).build())
-			.build();
-
-		// Test with ToolChoiceAuto
-		var autoToolChoice = new AnthropicApi.ToolChoiceAuto();
-		var prompt = client.buildRequestPrompt(
-				new Prompt("Test message content", AnthropicChatOptions.builder().toolChoice(autoToolChoice).build()));
-
-		var request = client.createRequest(prompt, false);
-
-		assertThat(request.toolChoice()).isNotNull();
-		assertThat(request.toolChoice()).isInstanceOf(AnthropicApi.ToolChoiceAuto.class);
-		assertThat(request.toolChoice().type()).isEqualTo("auto");
-
-		// Test with ToolChoiceAny
-		var anyToolChoice = new AnthropicApi.ToolChoiceAny();
-		prompt = client.buildRequestPrompt(
-				new Prompt("Test message content", AnthropicChatOptions.builder().toolChoice(anyToolChoice).build()));
-
-		request = client.createRequest(prompt, false);
-
-		assertThat(request.toolChoice()).isNotNull();
-		assertThat(request.toolChoice()).isInstanceOf(AnthropicApi.ToolChoiceAny.class);
-		assertThat(request.toolChoice().type()).isEqualTo("any");
-
-		// Test with ToolChoiceTool
-		var specificToolChoice = new AnthropicApi.ToolChoiceTool("get_weather");
-		prompt = client.buildRequestPrompt(new Prompt("Test message content",
-				AnthropicChatOptions.builder().toolChoice(specificToolChoice).build()));
-
-		request = client.createRequest(prompt, false);
-
-		assertThat(request.toolChoice()).isNotNull();
-		assertThat(request.toolChoice()).isInstanceOf(AnthropicApi.ToolChoiceTool.class);
-		assertThat(request.toolChoice().type()).isEqualTo("tool");
-		assertThat(((AnthropicApi.ToolChoiceTool) request.toolChoice()).name()).isEqualTo("get_weather");
-
-		// Test with ToolChoiceNone
-		var noneToolChoice = new AnthropicApi.ToolChoiceNone();
-		prompt = client.buildRequestPrompt(
-				new Prompt("Test message content", AnthropicChatOptions.builder().toolChoice(noneToolChoice).build()));
-
-		request = client.createRequest(prompt, false);
-
-		assertThat(request.toolChoice()).isNotNull();
-		assertThat(request.toolChoice()).isInstanceOf(AnthropicApi.ToolChoiceNone.class);
-		assertThat(request.toolChoice().type()).isEqualTo("none");
-
-		// Test with disableParallelToolUse
-		var autoWithDisabledParallel = new AnthropicApi.ToolChoiceAuto(true);
-		prompt = client.buildRequestPrompt(new Prompt("Test message content",
-				AnthropicChatOptions.builder().toolChoice(autoWithDisabledParallel).build()));
-
-		request = client.createRequest(prompt, false);
-
-		assertThat(request.toolChoice()).isNotNull();
-		assertThat(request.toolChoice()).isInstanceOf(AnthropicApi.ToolChoiceAuto.class);
-		assertThat(((AnthropicApi.ToolChoiceAuto) request.toolChoice()).disableParallelToolUse()).isTrue();
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/EventParsingTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/EventParsingTests.java
deleted file mode 100644
index 788a2673f17..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/EventParsingTests.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic;
-
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import tools.jackson.core.type.TypeReference;
-import tools.jackson.databind.json.JsonMapper;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.StreamEvent;
-import org.springframework.core.io.DefaultResourceLoader;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * @author Christian Tzolov
- * @since 1.0.0
- */
-public class EventParsingTests {
-
-	private static final Logger logger = LoggerFactory.getLogger(EventParsingTests.class);
-
-	@Test
-	public void readEvents() throws IOException {
-		String json = new DefaultResourceLoader().getResource("classpath:/sample_events.json")
-			.getContentAsString(Charset.defaultCharset());
-
-		List<StreamEvent> events = JsonMapper.shared().readerFor(new TypeReference<>() {
-		}).readValue(json);
-
-		logger.info(events.toString());
-
-		assertThat(events).hasSize(31);
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHintsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHintsTests.java
deleted file mode 100644
index efad955e69c..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/aot/AnthropicRuntimeHintsTests.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.aot;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.aot.hint.MemberCategory;
-import org.springframework.aot.hint.RuntimeHints;
-import org.springframework.aot.hint.TypeReference;
-
-import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
-import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage;
-
-class AnthropicRuntimeHintsTests {
-
-	private RuntimeHints runtimeHints;
-
-	private AnthropicRuntimeHints anthropicRuntimeHints;
-
-	@BeforeEach
-	void setUp() {
-		this.runtimeHints = new RuntimeHints();
-		this.anthropicRuntimeHints = new AnthropicRuntimeHints();
-	}
-
-	@Test
-	void registerHints() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		Set<TypeReference> jsonAnnotatedClasses = findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic");
-
-		Set<TypeReference> registeredTypes = new HashSet<>();
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> registeredTypes.add(typeHint.getType()));
-
-		for (TypeReference jsonAnnotatedClass : jsonAnnotatedClasses) {
-			assertThat(registeredTypes.contains(jsonAnnotatedClass)).isTrue();
-		}
-
-		// Check a few more specific ones
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.Role.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ThinkingType.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.EventType.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ContentBlock.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ChatCompletionRequest.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.AnthropicMessage.class))).isTrue();
-	}
-
-	@Test
-	void registerHintsWithNullClassLoader() {
-		// Test that registering hints with null ClassLoader works correctly
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		Set<TypeReference> registeredTypes = new HashSet<>();
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> registeredTypes.add(typeHint.getType()));
-
-		assertThat(registeredTypes.size()).isGreaterThan(0);
-	}
-
-	@Test
-	void registerHintsWithCustomClassLoader() {
-		// Test that registering hints with a custom ClassLoader works correctly
-		ClassLoader customClassLoader = Thread.currentThread().getContextClassLoader();
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, customClassLoader);
-
-		Set<TypeReference> registeredTypes = new HashSet<>();
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> registeredTypes.add(typeHint.getType()));
-
-		assertThat(registeredTypes.size()).isGreaterThan(0);
-	}
-
-	@Test
-	void allMemberCategoriesAreRegistered() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		Set<TypeReference> jsonAnnotatedClasses = findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic");
-
-		// Verify that all MemberCategory values are registered for each type
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> {
-			if (jsonAnnotatedClasses.contains(typeHint.getType())) {
-				Set<MemberCategory> expectedCategories = Set.of(MemberCategory.values());
-				Set<MemberCategory> actualCategories = typeHint.getMemberCategories();
-				assertThat(actualCategories.containsAll(expectedCategories)).isTrue();
-			}
-		});
-	}
-
-	@Test
-	void emptyRuntimeHintsInitiallyContainsNoTypes() {
-		// Verify that fresh RuntimeHints instance contains no reflection hints
-		RuntimeHints emptyHints = new RuntimeHints();
-		Set<TypeReference> emptyRegisteredTypes = new HashSet<>();
-		emptyHints.reflection().typeHints().forEach(typeHint -> emptyRegisteredTypes.add(typeHint.getType()));
-
-		assertThat(emptyRegisteredTypes.size()).isEqualTo(0);
-	}
-
-	@Test
-	void multipleRegistrationCallsAreIdempotent() {
-		// Register hints multiple times and verify no duplicates
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-		int firstRegistrationCount = (int) this.runtimeHints.reflection().typeHints().count();
-
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-		int secondRegistrationCount = (int) this.runtimeHints.reflection().typeHints().count();
-
-		assertThat(firstRegistrationCount).isEqualTo(secondRegistrationCount);
-	}
-
-	@Test
-	void verifyJsonAnnotatedClassesInPackageIsNotEmpty() {
-		Set<TypeReference> jsonAnnotatedClasses = findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic");
-		assertThat(jsonAnnotatedClasses.isEmpty()).isFalse();
-	}
-
-	@Test
-	void verifyEnumTypesAreRegistered() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		Set<TypeReference> registeredTypes = new HashSet<>();
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> registeredTypes.add(typeHint.getType()));
-
-		// Verify enum types are properly registered
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.Role.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ThinkingType.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.EventType.class))).isTrue();
-	}
-
-	@Test
-	void verifyNestedClassesAreRegistered() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		Set<TypeReference> registeredTypes = new HashSet<>();
-		this.runtimeHints.reflection().typeHints().forEach(typeHint -> registeredTypes.add(typeHint.getType()));
-
-		// Verify nested classes within AnthropicApi are registered
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ChatCompletionRequest.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.AnthropicMessage.class))).isTrue();
-		assertThat(registeredTypes.contains(TypeReference.of(AnthropicApi.ContentBlock.class))).isTrue();
-	}
-
-	@Test
-	void verifyNoProxyHintsAreRegistered() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		// This implementation should only register reflection hints, not proxy hints
-		long proxyHintCount = this.runtimeHints.proxies().jdkProxyHints().count();
-		assertThat(proxyHintCount).isEqualTo(0);
-	}
-
-	@Test
-	void verifyNoSerializationHintsAreRegistered() {
-		this.anthropicRuntimeHints.registerHints(this.runtimeHints, null);
-
-		// This implementation should only register reflection hints, not serialization
-		// hints
-		long serializationHintCount = this.runtimeHints.serialization().javaSerializationHints().count();
-		assertThat(serializationHintCount).isEqualTo(0);
-	}
-
-	@Test
-	void verifyJsonAnnotatedClassesContainExpectedTypes() {
-		Set<TypeReference> jsonAnnotatedClasses = findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic");
-
-		// Verify that key API classes are found
-		boolean containsApiClass = jsonAnnotatedClasses.stream()
-			.anyMatch(typeRef -> typeRef.getName().contains("AnthropicApi")
-					|| typeRef.getName().contains("ChatCompletion") || typeRef.getName().contains("AnthropicMessage"));
-
-		assertThat(containsApiClass).isTrue();
-	}
-
-	@Test
-	void verifyConsistencyAcrossInstances() {
-		RuntimeHints hints1 = new RuntimeHints();
-		RuntimeHints hints2 = new RuntimeHints();
-
-		AnthropicRuntimeHints anthropicHints1 = new AnthropicRuntimeHints();
-		AnthropicRuntimeHints anthropicHints2 = new AnthropicRuntimeHints();
-
-		anthropicHints1.registerHints(hints1, null);
-		anthropicHints2.registerHints(hints2, null);
-
-		// Different instances should register the same hints
-		Set<TypeReference> types1 = new HashSet<>();
-		Set<TypeReference> types2 = new HashSet<>();
-
-		hints1.reflection().typeHints().forEach(hint -> types1.add(hint.getType()));
-		hints2.reflection().typeHints().forEach(hint -> types2.add(hint.getType()));
-
-		assertThat(types1).isEqualTo(types2);
-	}
-
-	@Test
-	void verifyPackageSpecificity() {
-		Set<TypeReference> jsonAnnotatedClasses = findJsonAnnotatedClassesInPackage("org.springframework.ai.anthropic");
-
-		// All found classes should be from the anthropic package specifically
-		for (TypeReference classRef : jsonAnnotatedClasses) {
-			assertThat(classRef.getName()).startsWith("org.springframework.ai.anthropic");
-		}
-
-		// Should not include classes from other AI packages
-		for (TypeReference classRef : jsonAnnotatedClasses) {
-			assertThat(classRef.getName()).doesNotContain("vertexai");
-			assertThat(classRef.getName()).doesNotContain("openai");
-			assertThat(classRef.getName()).doesNotContain("ollama");
-		}
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiBuilderTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiBuilderTests.java
deleted file mode 100644
index 1ececae4c56..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiBuilderTests.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Objects;
-import java.util.Queue;
-
-import okhttp3.mockwebserver.MockResponse;
-import okhttp3.mockwebserver.MockWebServer;
-import okhttp3.mockwebserver.RecordedRequest;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Nested;
-import org.junit.jupiter.api.Test;
-import org.opentest4j.AssertionFailedError;
-
-import org.springframework.ai.model.ApiKey;
-import org.springframework.ai.model.SimpleApiKey;
-import org.springframework.http.HttpHeaders;
-import org.springframework.http.HttpStatus;
-import org.springframework.http.MediaType;
-import org.springframework.http.ResponseEntity;
-import org.springframework.web.client.ResponseErrorHandler;
-import org.springframework.web.client.RestClient;
-import org.springframework.web.reactive.function.client.WebClient;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.mockito.Mockito.mock;
-
-/**
- * @author Filip Hrisafov
- * @author Oleksandr Klymenko
- */
-public class AnthropicApiBuilderTests {
-
-	private static final ApiKey TEST_API_KEY = new SimpleApiKey("test-api-key");
-
-	private static final String TEST_BASE_URL = "https://test.anthropic.com";
-
-	private static final String TEST_COMPLETIONS_PATH = "/test/completions";
-
-	@Test
-	void testMinimalBuilder() {
-		AnthropicApi api = AnthropicApi.builder().apiKey(TEST_API_KEY).build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Test
-	void testFullBuilder() {
-		RestClient.Builder restClientBuilder = RestClient.builder();
-		WebClient.Builder webClientBuilder = WebClient.builder();
-		ResponseErrorHandler errorHandler = mock(ResponseErrorHandler.class);
-
-		AnthropicApi api = AnthropicApi.builder()
-			.apiKey(TEST_API_KEY)
-			.baseUrl(TEST_BASE_URL)
-			.completionsPath(TEST_COMPLETIONS_PATH)
-			.restClientBuilder(restClientBuilder)
-			.webClientBuilder(webClientBuilder)
-			.responseErrorHandler(errorHandler)
-			.build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Test
-	void testMissingApiKey() {
-		assertThatThrownBy(() -> AnthropicApi.builder().build()).isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("apiKey must be set");
-	}
-
-	@Test
-	void testInvalidBaseUrl() {
-		assertThatThrownBy(() -> AnthropicApi.builder().baseUrl("").build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("baseUrl cannot be null or empty");
-
-		assertThatThrownBy(() -> AnthropicApi.builder().baseUrl(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("baseUrl cannot be null or empty");
-	}
-
-	@Test
-	void testInvalidCompletionsPath() {
-		assertThatThrownBy(() -> AnthropicApi.builder().completionsPath("").build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("completionsPath cannot be null or empty");
-
-		assertThatThrownBy(() -> AnthropicApi.builder().completionsPath(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("completionsPath cannot be null or empty");
-	}
-
-	@Test
-	void testInvalidRestClientBuilder() {
-		assertThatThrownBy(() -> AnthropicApi.builder().restClientBuilder(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("restClientBuilder cannot be null");
-	}
-
-	@Test
-	void testInvalidWebClientBuilder() {
-		assertThatThrownBy(() -> AnthropicApi.builder().webClientBuilder(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("webClientBuilder cannot be null");
-	}
-
-	@Test
-	void testInvalidResponseErrorHandler() {
-		assertThatThrownBy(() -> AnthropicApi.builder().responseErrorHandler(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("responseErrorHandler cannot be null");
-	}
-
-	@Test
-	void testApiKeyStringOverload() {
-		AnthropicApi api = AnthropicApi.builder().apiKey("test-string-key").build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Test
-	void testInvalidAnthropicVersion() {
-		assertThatThrownBy(() -> AnthropicApi.builder().apiKey(TEST_API_KEY).anthropicVersion(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("anthropicVersion cannot be null");
-	}
-
-	@Test
-	void testInvalidAnthropicBetaFeatures() {
-		assertThatThrownBy(() -> AnthropicApi.builder().apiKey(TEST_API_KEY).anthropicBetaFeatures(null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("anthropicBetaFeatures cannot be null");
-	}
-
-	@Test
-	void testDefaultValues() {
-		AnthropicApi api = AnthropicApi.builder().apiKey(TEST_API_KEY).build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Test
-	void testBuilderIndependence() {
-		AnthropicApi.Builder builder1 = AnthropicApi.builder().apiKey("key1").baseUrl("https://api1.example.com");
-
-		AnthropicApi.Builder builder2 = AnthropicApi.builder().apiKey("key2").baseUrl("https://api2.example.com");
-
-		AnthropicApi api1 = builder1.build();
-		AnthropicApi api2 = builder2.build();
-
-		assertThat(api1).isNotNull();
-		assertThat(api2).isNotNull();
-	}
-
-	@Test
-	void testCustomAnthropicVersionAndBetaFeatures() {
-		AnthropicApi api = AnthropicApi.builder()
-			.apiKey(TEST_API_KEY)
-			.anthropicVersion("version")
-			.anthropicBetaFeatures("custom-beta-feature")
-			.build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Test
-	void testApiKeyStringNullValidation() {
-		assertThatThrownBy(() -> AnthropicApi.builder().apiKey((String) null).build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("simpleApiKey cannot be null");
-	}
-
-	@Test
-	void testChainedBuilderMethods() {
-		AnthropicApi api = AnthropicApi.builder()
-			.baseUrl(TEST_BASE_URL)
-			.completionsPath(TEST_COMPLETIONS_PATH)
-			.apiKey(TEST_API_KEY)
-			.anthropicBetaFeatures("feature1,feature2")
-			.restClientBuilder(RestClient.builder())
-			.webClientBuilder(WebClient.builder())
-			.responseErrorHandler(mock(ResponseErrorHandler.class))
-			.build();
-
-		assertThat(api).isNotNull();
-	}
-
-	@Nested
-	class MockRequests {
-
-		MockWebServer mockWebServer;
-
-		@BeforeEach
-		void setUp() throws IOException {
-			this.mockWebServer = new MockWebServer();
-			this.mockWebServer.start();
-		}
-
-		@AfterEach
-		void tearDown() throws IOException {
-			this.mockWebServer.shutdown();
-		}
-
-		@Test
-		void dynamicApiKeyRestClient() throws InterruptedException {
-			Queue<ApiKey> apiKeys = new LinkedList<>(List.of(new SimpleApiKey("key1"), new SimpleApiKey("key2")));
-			AnthropicApi api = AnthropicApi.builder()
-				.apiKey(() -> Objects.requireNonNull(apiKeys.poll()).getValue())
-				.baseUrl(this.mockWebServer.url("/").toString())
-				.build();
-
-			MockResponse mockResponse = new MockResponse().setResponseCode(200)
-				.addHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
-				.setBody("""
-						{
-							"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
-						 	"type": "message",
-						 	"role": "assistant",
-						 	"content": [],
-						 	"model": "claude-opus-3-latest",
-						 	"max_tokens": 500,
-						 	"stop_reason": null,
-						 	"stop_sequence": null,
-							 "usage": {
-						     	"input_tokens": 25,
-						     	"output_tokens": 1
-							}
-						}
-						""");
-			this.mockWebServer.enqueue(mockResponse);
-			this.mockWebServer.enqueue(mockResponse);
-
-			AnthropicApi.AnthropicMessage chatCompletionMessage = new AnthropicApi.AnthropicMessage(
-					List.of(new AnthropicApi.ContentBlock("Hello world")), AnthropicApi.Role.USER);
-			AnthropicApi.ChatCompletionRequest request = AnthropicApi.ChatCompletionRequest.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-				.maxTokens(500)
-				.temperature(0.8)
-				.messages(List.of(chatCompletionMessage))
-				.build();
-			ResponseEntity<AnthropicApi.ChatCompletionResponse> response = api.chatCompletionEntity(request);
-			assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
-			RecordedRequest recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("key1");
-
-			response = api.chatCompletionEntity(request);
-			assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
-
-			recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("key2");
-		}
-
-		@Test
-		void dynamicApiKeyRestClientWithAdditionalApiKeyHeader() throws InterruptedException {
-			AnthropicApi api = AnthropicApi.builder().apiKey(() -> {
-				throw new AssertionFailedError("Should not be called, API key is provided in headers");
-			}).baseUrl(this.mockWebServer.url("/").toString()).build();
-
-			MockResponse mockResponse = new MockResponse().setResponseCode(200)
-				.addHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
-				.setBody("""
-						{
-							"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
-						 	"type": "message",
-						 	"role": "assistant",
-						 	"content": [],
-						 	"model": "claude-opus-3-latest",
-						 	"max_tokens": 500,
-						 	"stop_reason": null,
-						 	"stop_sequence": null,
-							 "usage": {
-						     	"input_tokens": 25,
-						     	"output_tokens": 1
-							}
-						}
-						""");
-			this.mockWebServer.enqueue(mockResponse);
-
-			AnthropicApi.AnthropicMessage chatCompletionMessage = new AnthropicApi.AnthropicMessage(
-					List.of(new AnthropicApi.ContentBlock("Hello world")), AnthropicApi.Role.USER);
-			AnthropicApi.ChatCompletionRequest request = AnthropicApi.ChatCompletionRequest.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-				.maxTokens(500)
-				.temperature(0.8)
-				.messages(List.of(chatCompletionMessage))
-				.build();
-			var additionalHeaders = new HttpHeaders();
-			additionalHeaders.add("x-api-key", "additional-key");
-			ResponseEntity<AnthropicApi.ChatCompletionResponse> response = api.chatCompletionEntity(request,
-					additionalHeaders);
-			assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
-			RecordedRequest recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("additional-key");
-		}
-
-		@Test
-		void dynamicApiKeyWebClient() throws InterruptedException {
-			Queue<ApiKey> apiKeys = new LinkedList<>(List.of(new SimpleApiKey("key1"), new SimpleApiKey("key2")));
-			AnthropicApi api = AnthropicApi.builder()
-				.apiKey(() -> Objects.requireNonNull(apiKeys.poll()).getValue())
-				.baseUrl(this.mockWebServer.url("/").toString())
-				.build();
-
-			MockResponse mockResponse = new MockResponse().setResponseCode(200)
-				.addHeader(HttpHeaders.CONTENT_TYPE, MediaType.TEXT_EVENT_STREAM_VALUE)
-				.setBody("""
-						{
-							"type": "message_start",
-							"message": {
-								"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
-								"type": "message",
-								"role": "assistant",
-								"content": [],
-								"model": "claude-opus-4-20250514",
-								"max_tokens": 500,
-								"stop_reason": null,
-								"stop_sequence": null,
-								"usage": {
-									"input_tokens": 25,
-									"output_tokens": 1
-								}
-							}
-						}
-						""".replace("\n", ""));
-			this.mockWebServer.enqueue(mockResponse);
-			this.mockWebServer.enqueue(mockResponse);
-
-			AnthropicApi.AnthropicMessage chatCompletionMessage = new AnthropicApi.AnthropicMessage(
-					List.of(new AnthropicApi.ContentBlock("Hello world")), AnthropicApi.Role.USER);
-			AnthropicApi.ChatCompletionRequest request = AnthropicApi.ChatCompletionRequest.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-				.maxTokens(500)
-				.temperature(0.8)
-				.messages(List.of(chatCompletionMessage))
-				.stream(true)
-				.build();
-			api.chatCompletionStream(request).collectList().block();
-			RecordedRequest recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("key1");
-
-			api.chatCompletionStream(request).collectList().block();
-
-			recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("key2");
-		}
-
-		@Test
-		void dynamicApiKeyWebClientWithAdditionalApiKey() throws InterruptedException {
-			Queue<ApiKey> apiKeys = new LinkedList<>(List.of(new SimpleApiKey("key1"), new SimpleApiKey("key2")));
-			AnthropicApi api = AnthropicApi.builder()
-				.apiKey(() -> Objects.requireNonNull(apiKeys.poll()).getValue())
-				.baseUrl(this.mockWebServer.url("/").toString())
-				.build();
-
-			MockResponse mockResponse = new MockResponse().setResponseCode(200)
-				.addHeader(HttpHeaders.CONTENT_TYPE, MediaType.TEXT_EVENT_STREAM_VALUE)
-				.setBody("""
-						{
-							"type": "message_start",
-							"message": {
-								"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
-								"type": "message",
-								"role": "assistant",
-								"content": [],
-								"model": "claude-opus-4-20250514",
-								"max_tokens": 500,
-								"stop_reason": null,
-								"stop_sequence": null,
-								"usage": {
-									"input_tokens": 25,
-									"output_tokens": 1
-								}
-							}
-						}
-						""".replace("\n", ""));
-			this.mockWebServer.enqueue(mockResponse);
-
-			AnthropicApi.AnthropicMessage chatCompletionMessage = new AnthropicApi.AnthropicMessage(
-					List.of(new AnthropicApi.ContentBlock("Hello world")), AnthropicApi.Role.USER);
-			AnthropicApi.ChatCompletionRequest request = AnthropicApi.ChatCompletionRequest.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-				.maxTokens(500)
-				.temperature(0.8)
-				.messages(List.of(chatCompletionMessage))
-				.stream(true)
-				.build();
-			var additionalHeaders = new HttpHeaders();
-			additionalHeaders.add("x-api-key", "additional-key");
-
-			api.chatCompletionStream(request, additionalHeaders).collectList().block();
-			RecordedRequest recordedRequest = this.mockWebServer.takeRequest();
-			assertThat(recordedRequest.getHeader(HttpHeaders.AUTHORIZATION)).isNull();
-			assertThat(recordedRequest.getHeader("x-api-key")).isEqualTo("additional-key");
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiFilesTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiFilesTests.java
deleted file mode 100644
index be2a676a381..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiFilesTests.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.FileMetadata;
-import org.springframework.ai.anthropic.api.AnthropicApi.FilesListResponse;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Unit tests for Files API models in {@link AnthropicApi}.
- *
- * @author Soby Chacko
- * @since 2.0.0
- */
-class AnthropicApiFilesTests {
-
-	@Test
-	void shouldCreateFileMetadataRecord() {
-		FileMetadata metadata = new FileMetadata("file_abc123", "sales_report.xlsx", 12345L,
-				"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "2025-11-02T12:00:00Z",
-				"2025-11-03T12:00:00Z");
-
-		assertThat(metadata.id()).isEqualTo("file_abc123");
-		assertThat(metadata.filename()).isEqualTo("sales_report.xlsx");
-		assertThat(metadata.size()).isEqualTo(12345L);
-		assertThat(metadata.mimeType()).isEqualTo("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-		assertThat(metadata.createdAt()).isEqualTo("2025-11-02T12:00:00Z");
-		assertThat(metadata.expiresAt()).isEqualTo("2025-11-03T12:00:00Z");
-	}
-
-	@Test
-	void shouldCreateFileMetadataWithNullFields() {
-		FileMetadata metadata = new FileMetadata("file_123", "test.xlsx", 100L, "application/xlsx", null, null);
-
-		assertThat(metadata.id()).isEqualTo("file_123");
-		assertThat(metadata.filename()).isEqualTo("test.xlsx");
-		assertThat(metadata.size()).isEqualTo(100L);
-		assertThat(metadata.mimeType()).isEqualTo("application/xlsx");
-		assertThat(metadata.createdAt()).isNull();
-		assertThat(metadata.expiresAt()).isNull();
-	}
-
-	@Test
-	void shouldCreateFilesListResponse() {
-		List<FileMetadata> files = List.of(
-				new FileMetadata("file_1", "file1.xlsx", 100L, "application/xlsx", null, null),
-				new FileMetadata("file_2", "file2.pptx", 200L, "application/pptx", null, null));
-
-		FilesListResponse response = new FilesListResponse(files, true, "next_page_token");
-
-		assertThat(response.data()).hasSize(2);
-		assertThat(response.data().get(0).id()).isEqualTo("file_1");
-		assertThat(response.data().get(1).id()).isEqualTo("file_2");
-		assertThat(response.hasMore()).isTrue();
-		assertThat(response.nextPage()).isEqualTo("next_page_token");
-	}
-
-	@Test
-	void shouldCreateFilesListResponseWithEmptyList() {
-		FilesListResponse response = new FilesListResponse(List.of(), false, null);
-
-		assertThat(response.data()).isEmpty();
-		assertThat(response.hasMore()).isFalse();
-		assertThat(response.nextPage()).isNull();
-	}
-
-	@Test
-	void shouldCreateFilesListResponseWithMultiplePages() {
-		List<FileMetadata> page1 = List.of(
-				new FileMetadata("file_1", "file1.xlsx", 100L, "application/xlsx", "2025-11-02T10:00:00Z",
-						"2025-11-03T10:00:00Z"),
-				new FileMetadata("file_2", "file2.pptx", 200L, "application/pptx", "2025-11-02T11:00:00Z",
-						"2025-11-03T11:00:00Z"));
-
-		FilesListResponse response = new FilesListResponse(page1, true, "page_2_token");
-
-		assertThat(response.data()).hasSize(2);
-		assertThat(response.hasMore()).isTrue();
-		assertThat(response.nextPage()).isEqualTo("page_2_token");
-
-		// Verify metadata details
-		FileMetadata first = response.data().get(0);
-		assertThat(first.filename()).isEqualTo("file1.xlsx");
-		assertThat(first.size()).isEqualTo(100L);
-		assertThat(first.createdAt()).isEqualTo("2025-11-02T10:00:00Z");
-		assertThat(first.expiresAt()).isEqualTo("2025-11-03T10:00:00Z");
-	}
-
-	@Test
-	void shouldHandleDifferentFileTypes() {
-		List<FileMetadata> files = List.of(
-				new FileMetadata("file_1", "report.xlsx", 5000L,
-						"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null, null),
-				new FileMetadata("file_2", "presentation.pptx", 15000L,
-						"application/vnd.openxmlformats-officedocument.presentationml.presentation", null, null),
-				new FileMetadata("file_3", "document.docx", 8000L,
-						"application/vnd.openxmlformats-officedocument.wordprocessingml.document", null, null),
-				new FileMetadata("file_4", "output.pdf", 25000L, "application/pdf", null, null));
-
-		FilesListResponse response = new FilesListResponse(files, false, null);
-
-		assertThat(response.data()).hasSize(4);
-		assertThat(response.data()).extracting(FileMetadata::filename)
-			.containsExactly("report.xlsx", "presentation.pptx", "document.docx", "output.pdf");
-		assertThat(response.data()).extracting(FileMetadata::mimeType)
-			.containsExactly("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-					"application/vnd.openxmlformats-officedocument.presentationml.presentation",
-					"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/pdf");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
deleted file mode 100644
index 62818a9af4a..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import reactor.core.publisher.Flux;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.model.ModelOptionsUtils;
-import org.springframework.http.ResponseEntity;
-import org.springframework.util.CollectionUtils;
-import org.springframework.util.StringUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-/**
- * @author Christian Tzolov
- * @author Jihoon Kim
- * @author Alexandros Pappas
- * @author Claudio Silva Junior
- * @author Soby Chacko
- */
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-public class AnthropicApiIT {
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicApiIT.class);
-
-	AnthropicApi anthropicApi = AnthropicApi.builder().apiKey(System.getenv("ANTHROPIC_API_KEY")).build();
-
-	List<AnthropicApi.Tool> tools = List.of(new AnthropicApi.Tool("getCurrentWeather",
-			"Get the weather in location. Return temperature in 30°F or 30°C format.", ModelOptionsUtils.jsonToMap("""
-					{
-						"type": "object",
-						"properties": {
-							"location": {
-								"type": "string",
-								"description": "The city and state e.g. San Francisco, CA"
-							},
-							"unit": {
-								"type": "string",
-								"enum": ["C", "F"]
-							}
-						},
-						"required": ["location", "unit"]
-					}
-					""")));
-
-	@Test
-	void chatWithPromptCache() {
-		String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which "
-				+ "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" "
-				+ "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English "
-				+ "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". "
-				+ "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\"";
-
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
-				List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())),
-				Role.USER);
-
-		ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest(
-				AnthropicApi.ChatModel.CLAUDE_SONNET_4_6.getValue(), List.of(chatCompletionMessage), null, 100, 0.8,
-				false);
-
-		// First request - creates cache
-		AnthropicApi.Usage createdCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest)
-			.getBody()
-			.usage();
-
-		assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0);
-		assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0);
-
-		// Second request - reads from cache (same request)
-		AnthropicApi.Usage readCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest)
-			.getBody()
-			.usage();
-
-		assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0);
-		assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0);
-	}
-
-	@Test
-	void chatCompletionEntity() {
-
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
-				Role.USER);
-		ResponseEntity<ChatCompletionResponse> response = this.anthropicApi
-			.chatCompletionEntity(ChatCompletionRequest.builder()
-				.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-				.messages(List.of(chatCompletionMessage))
-				.maxTokens(100)
-				.temperature(0.8)
-				.stream(false)
-				.build());
-
-		logger.info("Non-Streaming Response: {}", response.getBody());
-		assertThat(response).isNotNull();
-		assertThat(response.getBody()).isNotNull();
-		assertThat(response.getBody().content()).isNotEmpty();
-		assertThat(response.getBody().content().get(0).text()).isNotBlank();
-		assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
-	}
-
-	@Test
-	void chatCompletionWithThinking() {
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
-				List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
-				Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.messages(List.of(chatCompletionMessage))
-			.maxTokens(8192)
-			.temperature(1.0) // temperature should be set to 1 when thinking is enabled
-			.thinking(new ChatCompletionRequest.ThinkingConfig(AnthropicApi.ThinkingType.ENABLED, 2048))
-			.build();
-
-		ResponseEntity<ChatCompletionResponse> response = this.anthropicApi.chatCompletionEntity(request);
-
-		assertThat(response).isNotNull();
-		assertThat(response.getBody()).isNotNull();
-		assertThat(response.getBody().content()).isNotEmpty();
-
-		boolean foundThinkingBlock = false;
-		boolean foundTextBlock = false;
-
-		List<ContentBlock> content = response.getBody().content();
-		for (ContentBlock block : content) {
-			if (block.type() == ContentBlock.Type.THINKING) {
-				assertThat(block.thinking()).isNotBlank();
-				assertThat(block.signature()).isNotBlank();
-				foundThinkingBlock = true;
-			}
-			// Note: Redacted thinking might occur if budget is exceeded or other reasons.
-			if (block.type() == ContentBlock.Type.REDACTED_THINKING) {
-				assertThat(block.data()).isNotBlank();
-			}
-			if (block.type() == ContentBlock.Type.TEXT) {
-				assertThat(block.text()).isNotBlank();
-				foundTextBlock = true;
-			}
-		}
-
-		assertThat(foundThinkingBlock).isTrue();
-		assertThat(foundTextBlock).isTrue();
-		assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
-	}
-
-	@Test
-	void chatCompletionStream() {
-
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
-				Role.USER);
-
-		Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.messages(List.of(chatCompletionMessage))
-			.maxTokens(100)
-			.temperature(0.8)
-			.stream(true)
-			.build());
-
-		assertThat(response).isNotNull();
-
-		List<ChatCompletionResponse> results = response.collectList().block();
-		assertThat(results).isNotNull().isNotEmpty();
-
-		results.forEach(chunk -> logger.info("Streaming Chunk: {}", chunk));
-
-		// Verify the stream contains actual text content deltas
-		String aggregatedText = results.stream()
-			.filter(r -> !CollectionUtils.isEmpty(r.content()))
-			.flatMap(r -> r.content().stream())
-			.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
-			.map(ContentBlock::text)
-			.collect(Collectors.joining());
-		assertThat(aggregatedText).isNotBlank();
-
-		// Verify the final state
-		ChatCompletionResponse lastMeaningfulResponse = results.stream()
-			.filter(r -> StringUtils.hasText(r.stopReason()))
-			.reduce((first, second) -> second)
-			.orElse(results.get(results.size() - 1)); // Fallback to very last if no stop
-
-		// StopReason found earlier
-		assertThat(lastMeaningfulResponse.stopReason()).isEqualTo("end_turn");
-		assertThat(lastMeaningfulResponse.usage()).isNotNull();
-		assertThat(lastMeaningfulResponse.usage().outputTokens()).isPositive();
-	}
-
-	@Test
-	void chatCompletionStreamWithThinking() {
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
-				List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
-				Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.messages(List.of(chatCompletionMessage))
-			.maxTokens(2048)
-			.temperature(1.0)
-			.stream(true)
-			.thinking(new ChatCompletionRequest.ThinkingConfig(AnthropicApi.ThinkingType.ENABLED, 1024))
-			.build();
-
-		Flux<ChatCompletionResponse> responseFlux = this.anthropicApi.chatCompletionStream(request);
-
-		assertThat(responseFlux).isNotNull();
-
-		List<ChatCompletionResponse> results = responseFlux.collectList().block();
-		assertThat(results).isNotNull().isNotEmpty();
-
-		results.forEach(chunk -> logger.info("Streaming Thinking Chunk: {}", chunk));
-
-		// Verify MESSAGE_START event exists
-		assertThat(results.stream().anyMatch(r -> EventType.MESSAGE_START.name().equals(r.type()))).isTrue();
-		assertThat(results.get(0).id()).isNotBlank();
-		assertThat(results.get(0).role()).isEqualTo(Role.ASSISTANT);
-
-		// Verify presence of THINKING_DELTA content
-		boolean foundThinkingDelta = results.stream()
-			.filter(r -> !CollectionUtils.isEmpty(r.content()))
-			.flatMap(r -> r.content().stream())
-			.anyMatch(cb -> cb.type() == ContentBlock.Type.THINKING_DELTA && StringUtils.hasText(cb.thinking()));
-		assertThat(foundThinkingDelta).as("Should find THINKING_DELTA content").isTrue();
-
-		// Verify presence of SIGNATURE_DELTA content
-		boolean foundSignatureDelta = results.stream()
-			.filter(r -> !CollectionUtils.isEmpty(r.content()))
-			.flatMap(r -> r.content().stream())
-			.anyMatch(cb -> cb.type() == ContentBlock.Type.SIGNATURE_DELTA && StringUtils.hasText(cb.signature()));
-		assertThat(foundSignatureDelta).as("Should find SIGNATURE_DELTA content").isTrue();
-
-		// Verify presence of TEXT_DELTA content (the actual answer)
-		boolean foundTextDelta = results.stream()
-			.filter(r -> !CollectionUtils.isEmpty(r.content()))
-			.flatMap(r -> r.content().stream())
-			.anyMatch(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA && StringUtils.hasText(cb.text()));
-		assertThat(foundTextDelta).as("Should find TEXT_DELTA content").isTrue();
-
-		// Combine text deltas to check final answer structure
-		String aggregatedText = results.stream()
-			.filter(r -> !CollectionUtils.isEmpty(r.content()))
-			.flatMap(r -> r.content().stream())
-			.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
-			.map(ContentBlock::text)
-			.collect(Collectors.joining());
-		assertThat(aggregatedText).as("Aggregated text response should not be blank").isNotBlank();
-		logger.info("Aggregated Text from Stream: {}", aggregatedText);
-
-		// Verify the final state (stop reason and usage)
-		ChatCompletionResponse finalStateEvent = results.stream()
-			.filter(r -> StringUtils.hasText(r.stopReason()))
-			.reduce((first, second) -> second)
-			.orElse(null);
-
-		assertThat(finalStateEvent).as("Should find an event with stopReason").isNotNull();
-		assertThat(finalStateEvent.stopReason()).isEqualTo("end_turn");
-		assertThat(finalStateEvent.usage()).isNotNull();
-		assertThat(finalStateEvent.usage().outputTokens()).isPositive();
-		assertThat(finalStateEvent.usage().inputTokens()).isPositive();
-
-		// Verify presence of key event types
-		assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_START.name().equals(r.type())))
-			.as("Should find CONTENT_BLOCK_START event")
-			.isTrue();
-		assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_STOP.name().equals(r.type())))
-			.as("Should find CONTENT_BLOCK_STOP event")
-			.isTrue();
-		assertThat(results.stream()
-			.anyMatch(r -> EventType.MESSAGE_STOP.name().equals(r.type()) || StringUtils.hasText(r.stopReason())))
-			.as("Should find MESSAGE_STOP or MESSAGE_DELTA with stopReason")
-			.isTrue();
-	}
-
-	@Test
-	void chatCompletionStreamWithToolCall() {
-		List<AnthropicMessage> messageConversation = new ArrayList<>();
-
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
-				List.of(new ContentBlock("What's the weather like in San Francisco? Show the temperature in Celsius.")),
-				Role.USER);
-
-		messageConversation.add(chatCompletionMessage);
-
-		ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.messages(messageConversation)
-			.maxTokens(1500)
-			.stream(true)
-			.temperature(0.8)
-			.tools(this.tools)
-			.build();
-
-		List<ChatCompletionResponse> responses = this.anthropicApi.chatCompletionStream(chatCompletionRequest)
-			.collectList()
-			.block();
-
-		// Check that tool uses response returned only once
-		List<ChatCompletionResponse> toolCompletionResponses = responses.stream()
-			.filter(r -> r.stopReason() != null && r.stopReason().equals(ContentBlock.Type.TOOL_USE.value))
-			.toList();
-		assertThat(toolCompletionResponses).size().isEqualTo(1);
-		List<ContentBlock> toolContentBlocks = toolCompletionResponses.get(0).content();
-		assertThat(toolContentBlocks).size().isEqualTo(1);
-		ContentBlock toolContentBlock = toolContentBlocks.get(0);
-		assertThat(toolContentBlock.type()).isEqualTo(ContentBlock.Type.TOOL_USE);
-		assertThat(toolContentBlock.name()).isEqualTo("getCurrentWeather");
-
-		// Check that message stop response also returned
-		List<ChatCompletionResponse> messageStopEvents = responses.stream()
-			.filter(r -> r.type().equals(AnthropicApi.EventType.MESSAGE_STOP.name()))
-			.toList();
-		assertThat(messageStopEvents).size().isEqualTo(1);
-	}
-
-	@Test
-	void chatCompletionStreamError() {
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
-				Role.USER);
-		AnthropicApi api = AnthropicApi.builder().apiKey("FAKE_KEY_FOR_ERROR_RESPONSE").build();
-
-		Flux<ChatCompletionResponse> response = api.chatCompletionStream(ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
-			.messages(List.of(chatCompletionMessage))
-			.maxTokens(100)
-			.temperature(0.8)
-			.stream(true)
-			.build());
-
-		assertThat(response).isNotNull();
-
-		assertThatThrownBy(() -> response.collectList().block()).isInstanceOf(RuntimeException.class)
-			.hasMessageStartingWith("Response exception, Status: [")
-			.hasMessageContaining("\"type\":\"error\"")
-			.hasMessageContaining("\"type\":\"authentication_error\"")
-			.hasMessageContaining("\"message\":\"invalid x-api-key\"");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiSkillTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiSkillTests.java
deleted file mode 100644
index 7ba2721a4d6..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiSkillTests.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicSkill;
-import org.springframework.ai.anthropic.api.AnthropicApi.Skill;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillContainer;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillType;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-/**
- * Unit tests for Anthropic Skills API models.
- *
- * @author Soby Chacko
- * @since 2.0.0
- */
-class AnthropicApiSkillTests {
-
-	@Test
-	void shouldCreateAnthropicSkill() {
-		Skill skill = Skill.builder().type(SkillType.ANTHROPIC).skillId("xlsx").version("20251013").build();
-
-		assertThat(skill.type()).isEqualTo(SkillType.ANTHROPIC);
-		assertThat(skill.skillId()).isEqualTo("xlsx");
-		assertThat(skill.version()).isEqualTo("20251013");
-	}
-
-	@Test
-	void shouldCreateCustomSkill() {
-		Skill skill = Skill.builder().type(SkillType.CUSTOM).skillId("custom-skill-id-12345").version("latest").build();
-
-		assertThat(skill.type()).isEqualTo(SkillType.CUSTOM);
-		assertThat(skill.skillId()).isEqualTo("custom-skill-id-12345");
-		assertThat(skill.version()).isEqualTo("latest");
-	}
-
-	@Test
-	void shouldDefaultToLatestVersion() {
-		Skill skill = new Skill(SkillType.ANTHROPIC, "xlsx");
-		assertThat(skill.version()).isEqualTo("latest");
-	}
-
-	@Test
-	void shouldCreateFromAnthropicSkillEnum() {
-		Skill skill = AnthropicSkill.XLSX.toSkill();
-
-		assertThat(skill.type()).isEqualTo(SkillType.ANTHROPIC);
-		assertThat(skill.skillId()).isEqualTo("xlsx");
-		assertThat(skill.version()).isEqualTo("latest");
-	}
-
-	@Test
-	void shouldCreateFromAnthropicSkillEnumWithVersion() {
-		Skill skill = AnthropicSkill.PPTX.toSkill("20251013");
-
-		assertThat(skill.type()).isEqualTo(SkillType.ANTHROPIC);
-		assertThat(skill.skillId()).isEqualTo("pptx");
-		assertThat(skill.version()).isEqualTo("20251013");
-	}
-
-	@Test
-	void shouldFailWhenSkillTypeIsNull() {
-		assertThatThrownBy(() -> Skill.builder().skillId("xlsx").build()).isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Skill type cannot be null");
-	}
-
-	@Test
-	void shouldFailWhenSkillIdIsEmpty() {
-		assertThatThrownBy(() -> Skill.builder().type(SkillType.ANTHROPIC).skillId("").build())
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Skill ID cannot be empty");
-	}
-
-	@Test
-	void shouldCreateContainerWithSingleSkill() {
-		SkillContainer container = SkillContainer.builder().skill("xlsx").build();
-
-		assertThat(container.skills()).hasSize(1);
-		assertThat(container.skills().get(0).skillId()).isEqualTo("xlsx");
-	}
-
-	@Test
-	void shouldCreateContainerWithMultipleSkills() {
-		SkillContainer container = SkillContainer.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX)
-			.skill("company-guidelines")
-			.build();
-
-		assertThat(container.skills()).hasSize(3);
-		assertThat(container.skills()).extracting(Skill::skillId).containsExactly("xlsx", "pptx", "company-guidelines");
-	}
-
-	@Test
-	void shouldEnforceMaximum8Skills() {
-		SkillContainer.SkillContainerBuilder builder = SkillContainer.builder();
-
-		// Add 9 skills
-		for (int i = 0; i < 9; i++) {
-			builder.skill("skill-" + i);
-		}
-
-		assertThatThrownBy(() -> builder.build()).isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Maximum of 8 skills per request");
-	}
-
-	@Test
-	void shouldFailWithEmptySkillsList() {
-		assertThatThrownBy(() -> new SkillContainer(List.of())).isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Skills list cannot be empty");
-	}
-
-	@Test
-	void shouldFailWithNullSkillsList() {
-		assertThatThrownBy(() -> new SkillContainer(null)).isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Skills list cannot be null");
-	}
-
-	@Test
-	void shouldAllowExactly8Skills() {
-		SkillContainer.SkillContainerBuilder builder = SkillContainer.builder();
-
-		for (int i = 0; i < 8; i++) {
-			builder.skill("skill-" + i);
-		}
-
-		SkillContainer container = builder.build();
-		assertThat(container.skills()).hasSize(8);
-	}
-
-	@Test
-	void shouldGetSkillIdFromAnthropicSkillEnum() {
-		assertThat(AnthropicSkill.XLSX.getSkillId()).isEqualTo("xlsx");
-		assertThat(AnthropicSkill.PPTX.getSkillId()).isEqualTo("pptx");
-		assertThat(AnthropicSkill.DOCX.getSkillId()).isEqualTo("docx");
-		assertThat(AnthropicSkill.PDF.getSkillId()).isEqualTo("pdf");
-	}
-
-	@Test
-	void shouldGetDescriptionFromAnthropicSkillEnum() {
-		assertThat(AnthropicSkill.XLSX.getDescription()).isEqualTo("Excel spreadsheet generation");
-		assertThat(AnthropicSkill.PPTX.getDescription()).isEqualTo("PowerPoint presentation creation");
-		assertThat(AnthropicSkill.DOCX.getDescription()).isEqualTo("Word document generation");
-		assertThat(AnthropicSkill.PDF.getDescription()).isEqualTo("PDF document creation");
-	}
-
-	@Test
-	void shouldGetValueFromSkillTypeEnum() {
-		assertThat(SkillType.ANTHROPIC.getValue()).isEqualTo("anthropic");
-		assertThat(SkillType.CUSTOM.getValue()).isEqualTo("custom");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/ChatCompletionRequestSkillsSerializationTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/ChatCompletionRequestSkillsSerializationTests.java
deleted file mode 100644
index 96e29957b92..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/ChatCompletionRequestSkillsSerializationTests.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.util.List;
-
-import org.junit.jupiter.api.Test;
-import tools.jackson.databind.json.JsonMapper;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicSkill;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.anthropic.api.AnthropicApi.Skill;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillContainer;
-import org.springframework.ai.anthropic.api.AnthropicApi.SkillType;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- * Unit tests for {@link ChatCompletionRequest} serialization with Skills.
- *
- * @author Soby Chacko
- * @since 2.0.0
- */
-class ChatCompletionRequestSkillsSerializationTests {
-
-	@Test
-	void shouldSerializeRequestWithSkills() {
-		SkillContainer container = SkillContainer.builder().skill(AnthropicSkill.XLSX).build();
-
-		AnthropicMessage message = new AnthropicMessage(List.of(new ContentBlock("Create a spreadsheet")), Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model("claude-sonnet-4-5")
-			.messages(List.of(message))
-			.maxTokens(1024)
-			.container(container)
-			.build();
-
-		String json = JsonMapper.shared().writeValueAsString(request);
-
-		assertThat(json).contains("\"container\"");
-		assertThat(json).contains("\"skills\"");
-		assertThat(json).contains("\"type\":\"anthropic\"");
-		assertThat(json).contains("\"skill_id\":\"xlsx\"");
-		assertThat(json).contains("\"version\":\"latest\"");
-	}
-
-	@Test
-	void shouldSerializeMultipleSkills() throws Exception {
-		SkillContainer container = SkillContainer.builder()
-			.skill(AnthropicSkill.XLSX)
-			.skill(AnthropicSkill.PPTX, "20251013")
-			.skill("custom-skill")
-			.build();
-
-		AnthropicMessage message = new AnthropicMessage(List.of(new ContentBlock("Create documents")), Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model("claude-sonnet-4-5")
-			.messages(List.of(message))
-			.maxTokens(1024)
-			.container(container)
-			.build();
-
-		String json = JsonMapper.shared().writeValueAsString(request);
-
-		assertThat(json).contains("\"xlsx\"");
-		assertThat(json).contains("\"pptx\"");
-		assertThat(json).contains("\"custom-skill\"");
-		assertThat(json).contains("\"20251013\"");
-	}
-
-	@Test
-	void shouldNotIncludeContainerWhenNull() throws Exception {
-		AnthropicMessage message = new AnthropicMessage(List.of(new ContentBlock("Simple message")), Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model("claude-sonnet-4-5")
-			.messages(List.of(message))
-			.maxTokens(1024)
-			.build();
-
-		String json = JsonMapper.shared().writeValueAsString(request);
-
-		assertThat(json).doesNotContain("\"container\"");
-	}
-
-	@Test
-	void shouldSerializeRequestWithSkillsUsingBuilderSkillsMethod() throws Exception {
-		List<Skill> skills = List.of(new Skill(SkillType.ANTHROPIC, "docx", "latest"),
-				new Skill(SkillType.CUSTOM, "my-skill", "20251013"));
-
-		AnthropicMessage message = new AnthropicMessage(List.of(new ContentBlock("Create documents")), Role.USER);
-
-		ChatCompletionRequest request = ChatCompletionRequest.builder()
-			.model("claude-sonnet-4-5")
-			.messages(List.of(message))
-			.maxTokens(1024)
-			.skills(skills)
-			.build();
-
-		String json = JsonMapper.shared().writeValueAsString(request);
-
-		assertThat(json).contains("\"container\"");
-		assertThat(json).contains("\"skills\"");
-		assertThat(json).contains("\"docx\"");
-		assertThat(json).contains("\"my-skill\"");
-		assertThat(json).contains("\"20251013\"");
-	}
-
-	@Test
-	void shouldDeserializeRequestWithSkills() throws Exception {
-		String json = """
-				{
-					"model": "claude-sonnet-4-5",
-					"messages": [
-						{
-							"role": "user",
-							"content": [{"type": "text", "text": "Hello"}]
-						}
-					],
-					"max_tokens": 1024,
-					"container": {
-						"skills": [
-							{
-								"type": "anthropic",
-								"skill_id": "xlsx",
-								"version": "latest"
-							}
-						]
-					}
-				}
-				""";
-
-		ChatCompletionRequest request = JsonMapper.shared().readValue(json, ChatCompletionRequest.class);
-
-		assertThat(request.container()).isNotNull();
-		assertThat(request.container().skills()).hasSize(1);
-		assertThat(request.container().skills().get(0).type()).isEqualTo(SkillType.ANTHROPIC);
-		assertThat(request.container().skills().get(0).skillId()).isEqualTo("xlsx");
-		assertThat(request.container().skills().get(0).version()).isEqualTo("latest");
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/StreamHelperTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/StreamHelperTests.java
deleted file mode 100644
index ff6a3b43d95..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/StreamHelperTests.java
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api;
-
-import java.lang.reflect.Field;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicReference;
-
-import org.junit.jupiter.api.Test;
-
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.MessageDeltaEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.MessageStartEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.MessageStopEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.PingEvent;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.anthropic.api.AnthropicApi.Usage;
-import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
-
-/**
- * Tests for {@link StreamHelper}.
- *
- * @author Ilayaperumal Gopinathan
- * @author Sun Yuhan
- */
-class StreamHelperTests {
-
-	@Test
-	void testErrorEventTypeWithEmptyContentBlock() {
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-		StreamHelper streamHelper = new StreamHelper();
-
-		// Initialize content block reference with a message start event. This ensures
-		// that message id, model and content are set in the contentBlockReference
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, null, null);
-		streamHelper.eventToChatCompletionResponse(new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message),
-				contentBlockReference);
-
-		AnthropicApi.ErrorEvent errorEvent = new AnthropicApi.ErrorEvent(AnthropicApi.EventType.ERROR,
-				new AnthropicApi.ErrorEvent.Error("error", "error message"));
-
-		AnthropicApi.ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(errorEvent,
-				contentBlockReference);
-		assertThat(response).isNotNull();
-	}
-
-	@Test
-	void testMultipleErrorEventsHandling() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		// Initialize content block reference with a message start event. This ensures
-		// that message id, model and content are set in the contentBlockReference
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, null, null);
-		streamHelper.eventToChatCompletionResponse(new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message),
-				contentBlockReference);
-
-		AnthropicApi.ErrorEvent firstError = new AnthropicApi.ErrorEvent(AnthropicApi.EventType.ERROR,
-				new AnthropicApi.ErrorEvent.Error("validation_error", "Invalid input"));
-		AnthropicApi.ErrorEvent secondError = new AnthropicApi.ErrorEvent(AnthropicApi.EventType.ERROR,
-				new AnthropicApi.ErrorEvent.Error("server_error", "Internal server error"));
-
-		AnthropicApi.ChatCompletionResponse response1 = streamHelper.eventToChatCompletionResponse(firstError,
-				contentBlockReference);
-		AnthropicApi.ChatCompletionResponse response2 = streamHelper.eventToChatCompletionResponse(secondError,
-				contentBlockReference);
-
-		assertThat(response1).isNotNull();
-		assertThat(response2).isNotNull();
-	}
-
-	@Test
-	void testMessageStartEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(10, 20, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.id()).isEqualTo("msg-1");
-		assertThat(response.type()).isEqualTo("MESSAGE_START");
-		assertThat(response.role()).isEqualTo(Role.ASSISTANT);
-		assertThat(response.model()).isEqualTo("claude-haiku-4-5");
-		assertThat(response.usage().inputTokens()).isEqualTo(10);
-		assertThat(response.usage().outputTokens()).isEqualTo(20);
-	}
-
-	@Test
-	void testContentBlockStartTextEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockStartEvent.ContentBlockText textBlock = new ContentBlockStartEvent.ContentBlockText("text",
-				"Hello");
-		ContentBlockStartEvent textStartEvent = new ContentBlockStartEvent(AnthropicApi.EventType.CONTENT_BLOCK_START,
-				0, textBlock);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(textStartEvent,
-				contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("CONTENT_BLOCK_START");
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.TEXT);
-		assertThat(response.content().get(0).text()).isEqualTo("Hello");
-	}
-
-	@Test
-	void testContentBlockDeltaTextEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockDeltaEvent.ContentBlockDeltaText deltaText = new ContentBlockDeltaEvent.ContentBlockDeltaText(
-				"text_delta", " world!");
-		ContentBlockDeltaEvent deltaEvent = new ContentBlockDeltaEvent(AnthropicApi.EventType.CONTENT_BLOCK_DELTA, 0,
-				deltaText);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(deltaEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("CONTENT_BLOCK_DELTA");
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.TEXT_DELTA);
-		assertThat(response.content().get(0).text()).isEqualTo(" world!");
-	}
-
-	@Test
-	void testMessageStopEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		MessageStopEvent stopEvent = new MessageStopEvent(AnthropicApi.EventType.MESSAGE_STOP);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(stopEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("MESSAGE_STOP");
-		assertThat(response.content()).isEmpty();
-		assertThat(response.stopReason()).isNull();
-		assertThat(response.stopSequence()).isNull();
-	}
-
-	@Test
-	void testMessageDeltaEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse initialMessage = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT,
-				List.of(), "claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, initialMessage);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		MessageDeltaEvent.MessageDelta delta = new MessageDeltaEvent.MessageDelta("end_turn", null);
-		MessageDeltaEvent.MessageDeltaUsage deltaUsage = new MessageDeltaEvent.MessageDeltaUsage(15);
-		MessageDeltaEvent deltaEvent = new MessageDeltaEvent(AnthropicApi.EventType.MESSAGE_DELTA, delta, deltaUsage);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(deltaEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("MESSAGE_DELTA");
-		assertThat(response.stopReason()).isEqualTo("end_turn");
-		assertThat(response.usage().outputTokens()).isEqualTo(15);
-	}
-
-	@Test
-	void testPingEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		PingEvent pingEvent = new PingEvent(AnthropicApi.EventType.PING);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(pingEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("PING");
-		assertThat(response.content()).isEmpty();
-	}
-
-	@Test
-	void testToolUseAggregateEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		AnthropicApi.ToolUseAggregationEvent toolEvent = new AnthropicApi.ToolUseAggregationEvent();
-
-		ContentBlockStartEvent.ContentBlockToolUse toolUse = new ContentBlockStartEvent.ContentBlockToolUse("tool_use",
-				"tool-1", "calculator", Map.of("operation", "add", "x", 2, "y", 3));
-
-		try {
-			Field toolContentBlocksField = AnthropicApi.ToolUseAggregationEvent.class
-				.getDeclaredField("toolContentBlocks");
-			toolContentBlocksField.setAccessible(true);
-			@SuppressWarnings("unchecked")
-			List<ContentBlockStartEvent.ContentBlockToolUse> toolContentBlocks = (List<ContentBlockStartEvent.ContentBlockToolUse>) toolContentBlocksField
-				.get(toolEvent);
-			toolContentBlocks.add(toolUse);
-
-			Field indexField = AnthropicApi.ToolUseAggregationEvent.class.getDeclaredField("index");
-			indexField.setAccessible(true);
-			indexField.set(toolEvent, 0);
-
-			Field idField = AnthropicApi.ToolUseAggregationEvent.class.getDeclaredField("id");
-			idField.setAccessible(true);
-			idField.set(toolEvent, "tool-1");
-
-			Field nameField = AnthropicApi.ToolUseAggregationEvent.class.getDeclaredField("name");
-			nameField.setAccessible(true);
-			nameField.set(toolEvent, "calculator");
-		}
-		catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(toolEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.TOOL_USE);
-		assertThat(response.content().get(0).id()).isEqualTo("tool-1");
-		assertThat(response.content().get(0).name()).isEqualTo("calculator");
-		assertThat(response.content().get(0).input()).containsEntry("operation", "add");
-	}
-
-	@Test
-	void testContentBlockStartThinkingEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockStartEvent.ContentBlockThinking thinkingBlock = new ContentBlockStartEvent.ContentBlockThinking(
-				"thinking", "Initial thinking content", "signature123");
-		ContentBlockStartEvent thinkingStartEvent = new ContentBlockStartEvent(
-				AnthropicApi.EventType.CONTENT_BLOCK_START, 0, thinkingBlock);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(thinkingStartEvent,
-				contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("CONTENT_BLOCK_START");
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.THINKING);
-		assertThat(response.content().get(0).thinking()).isEqualTo("Initial thinking content");
-		assertThat(response.content().get(0).signature()).isEqualTo("signature123");
-	}
-
-	@Test
-	void testContentBlockDeltaThinkingEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockDeltaEvent.ContentBlockDeltaThinking deltaThinking = new ContentBlockDeltaEvent.ContentBlockDeltaThinking(
-				"thinking_delta", "Additional thinking content");
-		ContentBlockDeltaEvent deltaEvent = new ContentBlockDeltaEvent(AnthropicApi.EventType.CONTENT_BLOCK_DELTA, 0,
-				deltaThinking);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(deltaEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("CONTENT_BLOCK_DELTA");
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.THINKING_DELTA);
-		assertThat(response.content().get(0).thinking()).isEqualTo("Additional thinking content");
-	}
-
-	@Test
-	void testContentBlockDeltaSignatureEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockDeltaEvent.ContentBlockDeltaSignature deltaSignature = new ContentBlockDeltaEvent.ContentBlockDeltaSignature(
-				"signature_delta", "signature456");
-		ContentBlockDeltaEvent deltaEvent = new ContentBlockDeltaEvent(AnthropicApi.EventType.CONTENT_BLOCK_DELTA, 0,
-				deltaSignature);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(deltaEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-		assertThat(response.type()).isEqualTo("CONTENT_BLOCK_DELTA");
-		assertThat(response.content()).hasSize(1);
-		assertThat(response.content().get(0).type()).isEqualTo(ContentBlock.Type.SIGNATURE_DELTA);
-		assertThat(response.content().get(0).signature()).isEqualTo("signature456");
-	}
-
-	@Test
-	void testContentBlockStopEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		AnthropicApi.ContentBlockStopEvent stopEvent = new AnthropicApi.ContentBlockStopEvent(
-				AnthropicApi.EventType.CONTENT_BLOCK_STOP, 0);
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(stopEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-	}
-
-	@Test
-	void testUnsupportedContentBlockType() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockStartEvent.ContentBlockBody unsupportedBlock = () -> "unsupported_type";
-
-		ContentBlockStartEvent unsupportedEvent = new ContentBlockStartEvent(AnthropicApi.EventType.CONTENT_BLOCK_START,
-				0, unsupportedBlock);
-
-		assertThatThrownBy(() -> streamHelper.eventToChatCompletionResponse(unsupportedEvent, contentBlockReference))
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Unsupported content block type");
-	}
-
-	@Test
-	void testUnsupportedContentBlockDeltaType() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		ContentBlockDeltaEvent.ContentBlockDeltaBody unsupportedDelta = () -> "unsupported_delta_type";
-
-		ContentBlockDeltaEvent unsupportedEvent = new ContentBlockDeltaEvent(AnthropicApi.EventType.CONTENT_BLOCK_DELTA,
-				0, unsupportedDelta);
-
-		assertThatThrownBy(() -> streamHelper.eventToChatCompletionResponse(unsupportedEvent, contentBlockReference))
-			.isInstanceOf(IllegalArgumentException.class)
-			.hasMessageContaining("Unsupported content block delta type");
-	}
-
-	@Test
-	void testToolUseAggregationWithEmptyToolContentBlocks() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		Usage usage = new Usage(0, 0, null, null);
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, usage, null);
-		MessageStartEvent startEvent = new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message);
-		streamHelper.eventToChatCompletionResponse(startEvent, contentBlockReference);
-
-		AnthropicApi.ToolUseAggregationEvent toolEvent = new AnthropicApi.ToolUseAggregationEvent();
-
-		try {
-			Field toolContentBlocksField = AnthropicApi.ToolUseAggregationEvent.class
-				.getDeclaredField("toolContentBlocks");
-			toolContentBlocksField.setAccessible(true);
-			@SuppressWarnings("unchecked")
-			List<ContentBlockStartEvent.ContentBlockToolUse> toolContentBlocks = (List<ContentBlockStartEvent.ContentBlockToolUse>) toolContentBlocksField
-				.get(toolEvent);
-			toolContentBlocks.clear(); // 清空列表
-
-			Field indexField = AnthropicApi.ToolUseAggregationEvent.class.getDeclaredField("index");
-			indexField.setAccessible(true);
-			indexField.set(toolEvent, null);
-		}
-		catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-
-		ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(toolEvent, contentBlockReference);
-
-		assertThat(response).isNotNull();
-	}
-
-	@Test
-	void testMergeToolUseEventsWithNonToolUseAggregationEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-
-		AnthropicApi.StreamEvent previousEvent = new AnthropicApi.PingEvent(AnthropicApi.EventType.PING);
-		AnthropicApi.StreamEvent currentEvent = new AnthropicApi.PingEvent(AnthropicApi.EventType.PING);
-
-		AnthropicApi.StreamEvent result = streamHelper.mergeToolUseEvents(previousEvent, currentEvent);
-
-		assertThat(result).isEqualTo(currentEvent);
-	}
-
-	@Test
-	void testIsToolUseStartWithNullEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		assertThat(streamHelper.isToolUseStart(null)).isFalse();
-	}
-
-	@Test
-	void testIsToolUseStartWithNonContentBlockStartEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AnthropicApi.PingEvent pingEvent = new AnthropicApi.PingEvent(AnthropicApi.EventType.PING);
-		assertThat(streamHelper.isToolUseStart(pingEvent)).isFalse();
-	}
-
-	@Test
-	void testIsToolUseFinishWithNullEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		assertThat(streamHelper.isToolUseFinish(null)).isFalse();
-	}
-
-	@Test
-	void testIsToolUseFinishWithNonContentBlockStopEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AnthropicApi.PingEvent pingEvent = new AnthropicApi.PingEvent(AnthropicApi.EventType.PING);
-		assertThat(streamHelper.isToolUseFinish(pingEvent)).isFalse();
-	}
-
-	@Test
-	void testIsToolUseFinishWithContentBlockStopEvent() {
-		StreamHelper streamHelper = new StreamHelper();
-		AnthropicApi.ContentBlockStopEvent stopEvent = new AnthropicApi.ContentBlockStopEvent(
-				AnthropicApi.EventType.CONTENT_BLOCK_STOP, 0);
-		assertThat(streamHelper.isToolUseFinish(stopEvent)).isTrue();
-	}
-
-	@Test
-	void testPingEventHandling() {
-		StreamHelper streamHelper = new StreamHelper();
-		AtomicReference<ChatCompletionResponseBuilder> contentBlockReference = new AtomicReference<>();
-
-		// Initialize content block reference with a message start event. This ensures
-		// that message id, model and content are set in the contentBlockReference
-		ChatCompletionResponse message = new ChatCompletionResponse("msg-1", "message", Role.ASSISTANT, List.of(),
-				"claude-haiku-4-5", null, null, null, null);
-		streamHelper.eventToChatCompletionResponse(new MessageStartEvent(AnthropicApi.EventType.MESSAGE_START, message),
-				contentBlockReference);
-
-		AnthropicApi.PingEvent pingEvent = new AnthropicApi.PingEvent(AnthropicApi.EventType.PING);
-
-		AnthropicApi.ChatCompletionResponse response = streamHelper.eventToChatCompletionResponse(pingEvent,
-				contentBlockReference);
-
-		assertThat(response).isNotNull();
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/AnthropicApiToolIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/AnthropicApiToolIT.java
deleted file mode 100644
index 0d75284ce8e..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/AnthropicApiToolIT.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.api.tool;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.function.Function;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
-import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
-import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type;
-import org.springframework.ai.anthropic.api.AnthropicApi.Role;
-import org.springframework.ai.anthropic.api.AnthropicApi.Tool;
-import org.springframework.ai.model.ModelOptionsUtils;
-import org.springframework.http.ResponseEntity;
-import org.springframework.util.CollectionUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/**
- *
- * <a href="https://docs.anthropic.com/claude/docs/tool-use-examples">Tool use
- * examples</a> <br/>
- * <a href="https://docs.anthropic.com/claude/docs/tool-use">Tool use (function
- * calling)</a>
- *
- * @author Christian Tzolov
- * @since 1.0.0
- */
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-@SuppressWarnings("null")
-public class AnthropicApiToolIT {
-
-	public static final ConcurrentHashMap<String, Function> FUNCTIONS = new ConcurrentHashMap<>();
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicApiToolIT.class);
-
-	AnthropicApi anthropicApi = AnthropicApi.builder().apiKey(System.getenv("ANTHROPIC_API_KEY")).build();
-
-	List<Tool> tools = List.of(new Tool("getCurrentWeather",
-			"Get the weather in location. Return temperature in 30°F or 30°C format.", ModelOptionsUtils.jsonToMap("""
-					{
-						"type": "object",
-						"properties": {
-							"location": {
-								"type": "string",
-								"description": "The city and state e.g. San Francisco, CA"
-							},
-							"unit": {
-								"type": "string",
-								"enum": ["C", "F"]
-							}
-						},
-						"required": ["location", "unit"]
-					}
-					""")));
-
-	@Test
-	void toolCalls() {
-
-		List<AnthropicMessage> messageConversation = new ArrayList<>();
-
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock(
-				"What's the weather like in San Francisco, Tokyo, and Paris? Show the temperature in Celsius.")),
-				Role.USER);
-
-		messageConversation.add(chatCompletionMessage);
-
-		ResponseEntity<ChatCompletionResponse> chatCompletion = doCall(messageConversation);
-
-		var responseText = chatCompletion.getBody().content().get(0).text();
-		logger.info("FINAL RESPONSE: " + responseText);
-
-		assertThat(responseText).contains("15");
-		assertThat(responseText).contains("10");
-		assertThat(responseText).contains("30");
-	}
-
-	private ResponseEntity<ChatCompletionResponse> doCall(List<AnthropicMessage> messageConversation) {
-
-		ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-			.messages(messageConversation)
-			.maxTokens(1500)
-			.temperature(0.8)
-			.tools(this.tools)
-			.build();
-
-		ResponseEntity<ChatCompletionResponse> response = this.anthropicApi.chatCompletionEntity(chatCompletionRequest);
-
-		List<ContentBlock> toolToUseList = response.getBody()
-			.content()
-			.stream()
-			.filter(c -> c.type() == ContentBlock.Type.TOOL_USE)
-			.toList();
-
-		if (CollectionUtils.isEmpty(toolToUseList)) {
-			return response;
-		}
-		// Add use tool message to the conversation history
-		messageConversation.add(new AnthropicMessage(response.getBody().content(), Role.ASSISTANT));
-
-		List<ContentBlock> toolResults = new ArrayList<>();
-
-		for (ContentBlock toolToUse : toolToUseList) {
-
-			var id = toolToUse.id();
-			var name = toolToUse.name();
-			var input = toolToUse.input();
-
-			logger.info("FunctionCalls from the LLM: " + name);
-
-			MockWeatherService.Request request = ModelOptionsUtils.mapToClass(input, MockWeatherService.Request.class);
-
-			logger.info("Resolved function request param: " + request);
-
-			Object functionCallResponseData = FUNCTIONS.get(name).apply(request);
-
-			String content = ModelOptionsUtils.toJsonString(functionCallResponseData);
-
-			logger.info("Function response : " + content);
-
-			toolResults.add(new ContentBlock(Type.TOOL_RESULT, id, content));
-		}
-
-		// Add function response message to the conversation history
-		messageConversation.add(new AnthropicMessage(toolResults, Role.USER));
-
-		return doCall(messageConversation);
-	}
-
-	static {
-		FUNCTIONS.put("getCurrentWeather", new MockWeatherService());
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelIT.java
similarity index 54%
rename from models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
rename to models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelIT.java
index 69b1e5d39a2..743495b602b 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelIT.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic;
+package org.springframework.ai.anthropic.chat;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -23,6 +23,12 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.ToolChoice;
+import com.anthropic.models.messages.ToolChoiceAny;
+import com.anthropic.models.messages.ToolChoiceNone;
+import com.anthropic.models.messages.ToolChoiceTool;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -31,17 +37,18 @@
 import org.slf4j.LoggerFactory;
 import reactor.core.publisher.Flux;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.tool.MockWeatherService;
+import org.springframework.ai.anthropic.AnthropicChatModel;
+import org.springframework.ai.anthropic.AnthropicChatOptions;
+import org.springframework.ai.anthropic.AnthropicCitationDocument;
+import org.springframework.ai.anthropic.AnthropicTestConfiguration;
+import org.springframework.ai.anthropic.Citation;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.messages.AssistantMessage;
 import org.springframework.ai.chat.messages.Message;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.metadata.Usage;
-import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.model.Generation;
-import org.springframework.ai.chat.model.StreamingChatModel;
 import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.ai.chat.prompt.PromptTemplate;
 import org.springframework.ai.chat.prompt.SystemPromptTemplate;
@@ -49,37 +56,35 @@
 import org.springframework.ai.converter.BeanOutputConverter;
 import org.springframework.ai.converter.ListOutputConverter;
 import org.springframework.ai.converter.MapOutputConverter;
-import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.tool.function.FunctionToolCallback;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
-import org.springframework.boot.SpringBootConfiguration;
 import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.context.annotation.Bean;
 import org.springframework.core.convert.support.DefaultConversionService;
 import org.springframework.core.io.ClassPathResource;
 import org.springframework.core.io.Resource;
 import org.springframework.util.MimeType;
 import org.springframework.util.MimeTypeUtils;
-import org.springframework.util.StringUtils;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
-@SpringBootTest(classes = AnthropicChatModelIT.Config.class, properties = "spring.ai.retry.on-http-codes=429")
+/**
+ * Integration tests for {@link AnthropicChatModel}.
+ *
+ * @author Soby Chacko
+ */
+@SpringBootTest(classes = AnthropicTestConfiguration.class)
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
 class AnthropicChatModelIT {
 
 	private static final Logger logger = LoggerFactory.getLogger(AnthropicChatModelIT.class);
 
-	@Autowired
-	protected ChatModel chatModel;
-
-	@Autowired
-	protected StreamingChatModel streamingChatModel;
-
 	@Value("classpath:/prompts/system-message.st")
 	private Resource systemResource;
 
+	@Autowired
+	private AnthropicChatModel chatModel;
+
 	private static void validateChatResponseMetadata(ChatResponse response, String model) {
 		assertThat(response.getMetadata().getId()).isNotEmpty();
 		assertThat(response.getMetadata().getUsage().getPromptTokens()).isPositive();
@@ -88,7 +93,7 @@ private static void validateChatResponseMetadata(ChatResponse response, String m
 	}
 
 	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-haiku-4-5" })
+	@ValueSource(strings = { "claude-sonnet-4-20250514" })
 	void roleTest(String modelName) {
 		UserMessage userMessage = new UserMessage(
 				"Tell me about 3 famous pirates from the Golden Age of Piracy and why they did.");
@@ -111,38 +116,25 @@ void roleTest(String modelName) {
 
 	@Test
 	void testMessageHistory() {
-		UserMessage userMessage = new UserMessage(
+		// First turn - ask about pirates
+		UserMessage firstUserMessage = new UserMessage(
 				"Tell me about 3 famous pirates from the Golden Age of Piracy and why they did.");
 		SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(this.systemResource);
 		Message systemMessage = systemPromptTemplate.createMessage(Map.of("name", "Bob", "voice", "pirate"));
-		Prompt prompt = new Prompt(List.of(userMessage, systemMessage),
-				AnthropicChatOptions.builder().model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5).build());
+		Prompt prompt = new Prompt(List.of(systemMessage, firstUserMessage),
+				AnthropicChatOptions.builder().model(Model.CLAUDE_SONNET_4_20250514).build());
 
 		ChatResponse response = this.chatModel.call(prompt);
 		assertThat(response.getResult().getOutput().getText()).containsAnyOf("Blackbeard", "Bartholomew");
 
-		var promptWithMessageHistory = new Prompt(List.of(new UserMessage("Dummy"), response.getResult().getOutput(),
-				new UserMessage("Repeat the last assistant message.")));
+		// Second turn - include the first exchange in history, then ask to repeat
+		var promptWithMessageHistory = new Prompt(List.of(systemMessage, firstUserMessage,
+				response.getResult().getOutput(), new UserMessage("Repeat the names of the pirates you mentioned.")));
 		response = this.chatModel.call(promptWithMessageHistory);
 
 		assertThat(response.getResult().getOutput().getText()).containsAnyOf("Blackbeard", "Bartholomew");
 	}
 
-	@Test
-	void streamingWithTokenUsage() {
-		var promptOptions = AnthropicChatOptions.builder().temperature(0.0).build();
-
-		var prompt = new Prompt("List two colors of the Polish flag. Be brief.", promptOptions);
-		var streamingTokenUsage = this.chatModel.stream(prompt).blockLast().getMetadata().getUsage();
-		var referenceTokenUsage = this.chatModel.call(prompt).getMetadata().getUsage();
-
-		assertThat(streamingTokenUsage.getPromptTokens()).isGreaterThan(0);
-		assertThat(streamingTokenUsage.getCompletionTokens()).isGreaterThan(0);
-		assertThat(streamingTokenUsage.getTotalTokens()).isGreaterThan(0);
-
-		assertThat(streamingTokenUsage.getPromptTokens()).isEqualTo(referenceTokenUsage.getPromptTokens());
-	}
-
 	@Test
 	void listOutputConverter() {
 		DefaultConversionService conversionService = new DefaultConversionService();
@@ -183,12 +175,10 @@ void mapOutputConverter() {
 
 		Map<String, Object> result = mapOutputConverter.convert(generation.getOutput().getText());
 		assertThat(result.get("numbers")).isEqualTo(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9));
-
 	}
 
 	@Test
 	void beanOutputConverterRecords() {
-
 		BeanOutputConverter<ActorsFilmsRecord> beanOutputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);
 
 		String format = beanOutputConverter.getFormat();
@@ -210,80 +200,79 @@ void beanOutputConverterRecords() {
 	}
 
 	@Test
-	void beanStreamOutputConverterRecords() {
-
-		BeanOutputConverter<ActorsFilmsRecord> beanOutputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);
-
-		String format = beanOutputConverter.getFormat();
-		String template = """
-				Generate the filmography of 5 movies for Tom Hanks.
-				{format}
-				""";
-		PromptTemplate promptTemplate = PromptTemplate.builder()
-			.template(template)
-			.variables(Map.of("format", format))
-			.build();
-		Prompt prompt = new Prompt(promptTemplate.createMessage());
-
-		String generationTextFromStream = this.streamingChatModel.stream(prompt)
-			.collectList()
-			.block()
-			.stream()
-			.map(ChatResponse::getResults)
-			.flatMap(List::stream)
-			.map(Generation::getOutput)
-			.map(AssistantMessage::getText)
-			.collect(Collectors.joining());
+	void validateCallResponseMetadata() {
+		String model = Model.CLAUDE_SONNET_4_20250514.asString();
+		// @formatter:off
+		ChatResponse response = ChatClient.create(this.chatModel).prompt()
+				.options(AnthropicChatOptions.builder().model(model).build())
+				.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did")
+				.call()
+				.chatResponse();
+		// @formatter:on
 
-		ActorsFilmsRecord actorsFilms = beanOutputConverter.convert(generationTextFromStream);
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
-		assertThat(actorsFilms.movies()).hasSize(5);
+		logger.info(response.toString());
+		validateChatResponseMetadata(response, model);
 	}
 
 	@Test
-	void multiModalityTest() throws IOException {
+	void streamingBasicTest() {
+		Prompt prompt = new Prompt("Tell me a short joke about programming.");
 
-		var imageData = new ClassPathResource("/test.png");
+		List<ChatResponse> responses = this.chatModel.stream(prompt).collectList().block();
 
-		var userMessage = UserMessage.builder()
-			.text("Explain what do you see on this picture?")
-			.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageData)))
-			.build();
+		assertThat(responses).isNotEmpty();
 
-		var response = this.chatModel.call(new Prompt(List.of(userMessage)));
+		// Concatenate all text from streaming responses
+		String fullResponse = responses.stream()
+			.filter(response -> response.getResult() != null)
+			.map(response -> response.getResult().getOutput().getText())
+			.filter(text -> text != null)
+			.reduce("", String::concat);
 
-		logger.info(response.getResult().getOutput().getText());
-		assertThat(response.getResult().getOutput().getText()).containsAnyOf("bananas", "apple", "bowl", "basket",
-				"fruit stand");
+		assertThat(fullResponse).isNotEmpty();
+		logger.info("Streaming response: {}", fullResponse);
 	}
 
 	@Test
-	void multiModalityPdfTest() throws IOException {
+	void streamingWithTokenUsage() {
+		Prompt prompt = new Prompt("Tell me a very short joke.");
 
-		var pdfData = new ClassPathResource("/spring-ai-reference-overview.pdf");
+		List<ChatResponse> responses = this.chatModel.stream(prompt).collectList().block();
 
-		var userMessage = UserMessage.builder()
-			.text("You are a very professional document summarization specialist. Please summarize the given document.")
-			.media(List.of(new Media(new MimeType("application", "pdf"), pdfData)))
-			.build();
+		assertThat(responses).isNotEmpty();
 
-		var response = this.chatModel.call(new Prompt(List.of(userMessage),
-				ToolCallingChatOptions.builder().model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName()).build()));
+		// Find the response with usage metadata (comes from message_delta event)
+		ChatResponse lastResponseWithUsage = responses.stream()
+			.filter(response -> response.getMetadata() != null && response.getMetadata().getUsage() != null
+					&& response.getMetadata().getUsage().getTotalTokens() > 0)
+			.reduce((first, second) -> second)
+			.orElse(null);
 
-		assertThat(response.getResult().getOutput().getText()).containsAnyOf("Spring AI", "portable API");
+		assertThat(lastResponseWithUsage).isNotNull();
+
+		var usage = lastResponseWithUsage.getMetadata().getUsage();
+		logger.info("Streaming usage - Input: {}, Output: {}, Total: {}", usage.getPromptTokens(),
+				usage.getCompletionTokens(), usage.getTotalTokens());
+
+		// Verify both input and output tokens are captured
+		assertThat(usage.getPromptTokens()).as("Input tokens should be captured from message_start").isPositive();
+		assertThat(usage.getCompletionTokens()).as("Output tokens should be captured from message_delta").isPositive();
+		assertThat(usage.getTotalTokens()).isEqualTo(usage.getPromptTokens() + usage.getCompletionTokens());
+
+		// Also verify message metadata is captured
+		assertThat(lastResponseWithUsage.getMetadata().getId()).as("Message ID should be captured").isNotEmpty();
+		assertThat(lastResponseWithUsage.getMetadata().getModel()).as("Model should be captured").isNotEmpty();
 	}
 
 	@Test
 	void functionCallTest() {
-
 		UserMessage userMessage = new UserMessage(
 				"What's the weather like in San Francisco, Tokyo and Paris? Return the result in Celsius.");
 
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
+			.model(Model.CLAUDE_HAIKU_4_5.asString())
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
 						"Get the weather in location. Return temperature in 36°F or 36°C format. Use multi-turn if needed.")
@@ -301,21 +290,18 @@ void functionCallTest() {
 		assertThat(generation.getOutput().getText()).contains("30", "10", "15");
 		assertThat(response.getMetadata()).isNotNull();
 		assertThat(response.getMetadata().getUsage()).isNotNull();
-		assertThat(response.getMetadata().getUsage().getTotalTokens()).isLessThan(4000).isGreaterThan(100);
+		assertThat(response.getMetadata().getUsage().getTotalTokens()).isGreaterThan(100);
 	}
 
 	@Test
 	void streamFunctionCallTest() {
-
 		UserMessage userMessage = new UserMessage(
-				// "What's the weather like in San Francisco? Return the result in
-				// Celsius.");
 				"What's the weather like in San Francisco, Tokyo and Paris? Return the result in Celsius.");
 
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
+			.model(Model.CLAUDE_HAIKU_4_5.asString())
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
 						"Get the weather in location. Return temperature in 36°F or 36°C format. Use multi-turn if needed.")
@@ -323,29 +309,29 @@ void streamFunctionCallTest() {
 				.build())
 			.build();
 
-		Flux<ChatResponse> response = this.chatModel.stream(new Prompt(messages, promptOptions));
+		Flux<ChatResponse> responseFlux = this.chatModel.stream(new Prompt(messages, promptOptions));
 
-		String content = response.collectList()
+		String content = responseFlux.collectList()
 			.block()
 			.stream()
 			.filter(cr -> cr.getResult() != null)
 			.map(cr -> cr.getResult().getOutput().getText())
-			.collect(Collectors.joining());
+			.filter(text -> text != null)
+			.collect(java.util.stream.Collectors.joining());
 
-		logger.info("Response: {}", content);
+		logger.info("Streaming Response: {}", content);
 		assertThat(content).contains("30", "10", "15");
 	}
 
 	@Test
 	void streamFunctionCallUsageTest() {
-
 		UserMessage userMessage = new UserMessage(
 				"What's the weather like in San Francisco, Tokyo and Paris? Return the result in Celsius.");
 
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
+			.model(Model.CLAUDE_HAIKU_4_5.asString())
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
 						"Get the weather in location. Return temperature in 36°F or 36°C format. Use multi-turn if needed.")
@@ -355,33 +341,58 @@ void streamFunctionCallUsageTest() {
 
 		Flux<ChatResponse> responseFlux = this.chatModel.stream(new Prompt(messages, promptOptions));
 
-		ChatResponse chatResponse = responseFlux.last().block();
+		ChatResponse lastResponse = responseFlux.collectList()
+			.block()
+			.stream()
+			.filter(cr -> cr.getMetadata() != null && cr.getMetadata().getUsage() != null
+					&& cr.getMetadata().getUsage().getTotalTokens() > 0)
+			.reduce((first, second) -> second)
+			.orElse(null);
 
-		logger.info("Response: {}", chatResponse);
-		Usage usage = chatResponse.getMetadata().getUsage();
+		logger.info("Streaming Response with usage: {}", lastResponse);
 
+		assertThat(lastResponse).isNotNull();
+		Usage usage = lastResponse.getMetadata().getUsage();
 		assertThat(usage).isNotNull();
-		assertThat(usage.getTotalTokens()).isLessThan(4000).isGreaterThan(1800);
+		// Tool calling uses more tokens due to multi-turn conversation
+		assertThat(usage.getTotalTokens()).isGreaterThan(100);
 	}
 
 	@Test
-	void validateCallResponseMetadata() {
-		String model = AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName();
-		// @formatter:off
-		ChatResponse response = ChatClient.create(this.chatModel).prompt()
-				.options(AnthropicChatOptions.builder().model(model).build())
-				.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did")
-				.call()
-				.chatResponse();
-		// @formatter:on
+	void beanStreamOutputConverterRecords() {
+		BeanOutputConverter<ActorsFilmsRecord> beanOutputConverter = new BeanOutputConverter<>(ActorsFilmsRecord.class);
 
-		logger.info(response.toString());
-		validateChatResponseMetadata(response, model);
+		String format = beanOutputConverter.getFormat();
+		String template = """
+				Generate the filmography of 5 movies for Tom Hanks.
+				{format}
+				""";
+		PromptTemplate promptTemplate = PromptTemplate.builder()
+			.template(template)
+			.variables(Map.of("format", format))
+			.build();
+		Prompt prompt = new Prompt(promptTemplate.createMessage());
+
+		String generationTextFromStream = this.chatModel.stream(prompt)
+			.collectList()
+			.block()
+			.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getText)
+			.filter(text -> text != null)
+			.collect(Collectors.joining());
+
+		ActorsFilmsRecord actorsFilms = beanOutputConverter.convert(generationTextFromStream);
+		logger.info("" + actorsFilms);
+		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
+		assertThat(actorsFilms.movies()).hasSize(5);
 	}
 
 	@Test
 	void validateStreamCallResponseMetadata() {
-		String model = AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName();
+		String model = Model.CLAUDE_SONNET_4_20250514.asString();
 		// @formatter:off
 		ChatResponse response = ChatClient.create(this.chatModel).prompt()
 				.options(AnthropicChatOptions.builder().model(model).build())
@@ -392,72 +403,7 @@ void validateStreamCallResponseMetadata() {
 		// @formatter:on
 
 		logger.info(response.toString());
-		// Note, brittle test.
-		validateChatResponseMetadata(response, "claude-haiku-4-5-latest");
-	}
-
-	@Test
-	void thinkingTest() {
-		UserMessage userMessage = new UserMessage(
-				"Are there an infinite number of prime numbers such that n mod 4 == 3?");
-
-		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.temperature(1.0) // temperature should be set to 1 when thinking is enabled
-			.maxTokens(8192)
-			.thinking(AnthropicApi.ThinkingType.ENABLED, 2048) // Must be ≥1024 && <
-																// max_tokens
-			.build();
-
-		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), promptOptions));
-
-		logger.info("Response: {}", response);
-
-		for (Generation generation : response.getResults()) {
-			AssistantMessage message = generation.getOutput();
-			if (message.getText() != null) { // text
-				assertThat(message.getText()).isNotBlank();
-			}
-			else if (message.getMetadata().containsKey("signature")) { // thinking
-				assertThat(message.getMetadata().get("signature")).isNotNull();
-				assertThat(message.getMetadata().get("thinking")).isNotNull();
-			}
-			else if (message.getMetadata().containsKey("data")) { // redacted thinking
-				assertThat(message.getMetadata().get("data")).isNotNull();
-			}
-		}
-	}
-
-	@Test
-	void thinkingWithStreamingTest() {
-		UserMessage userMessage = new UserMessage(
-				"Are there an infinite number of prime numbers such that n mod 4 == 3?");
-
-		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_6.getName())
-			.temperature(1.0) // Temperature should be set to 1 when thinking is enabled
-			.maxTokens(8192)
-			.thinking(AnthropicApi.ThinkingType.ENABLED, 2048) // Must be ≥1024 && <
-																// max_tokens
-			.build();
-
-		Flux<ChatResponse> responseFlux = this.streamingChatModel
-			.stream(new Prompt(List.of(userMessage), promptOptions));
-
-		String content = responseFlux.collectList()
-			.block()
-			.stream()
-			.map(ChatResponse::getResults)
-			.flatMap(List::stream)
-			.map(Generation::getOutput)
-			.map(AssistantMessage::getText)
-			.filter(text -> text != null && !text.isBlank())
-			.collect(Collectors.joining());
-
-		logger.info("Response: {}", content);
-
-		assertThat(content).isNotBlank();
-		assertThat(content).contains("primes");
+		validateChatResponseMetadata(response, model);
 	}
 
 	@Test
@@ -468,7 +414,8 @@ void testToolUseContentBlock() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
+			.model(Model.CLAUDE_HAIKU_4_5.asString())
+			.internalToolExecutionEnabled(false)
 			.toolCallbacks(List.of(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
 						"Get the weather in location. Return temperature in 36°F or 36°C format. Use multi-turn if needed.")
@@ -499,8 +446,8 @@ void testToolChoiceAny() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.toolChoice(new AnthropicApi.ToolChoiceAny())
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.toolChoice(ToolChoice.ofAny(ToolChoiceAny.builder().build()))
 			.internalToolExecutionEnabled(false)
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
@@ -528,8 +475,8 @@ void testToolChoiceTool() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.toolChoice(new AnthropicApi.ToolChoiceTool("getFunResponse", true))
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.toolChoice(ToolChoice.ofTool(ToolChoiceTool.builder().name("getFunResponse").build()))
 			.internalToolExecutionEnabled(false)
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
@@ -566,8 +513,8 @@ void testToolChoiceNone() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getName())
-			.toolChoice(new AnthropicApi.ToolChoiceNone())
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.toolChoice(ToolChoice.ofNone(ToolChoiceNone.builder().build()))
 			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description(
 						"Get the weather in location. Return temperature in 36°F or 36°C format. Use multi-turn if needed.")
@@ -587,31 +534,346 @@ void testToolChoiceNone() {
 		assertThat(allToolCalls).isEmpty();
 	}
 
-	record ActorsFilmsRecord(String actor, List<String> movies) {
+	@Test
+	void multiModalityTest() throws IOException {
+		var imageData = new ClassPathResource("/test.png");
 
+		var userMessage = UserMessage.builder()
+			.text("Explain what do you see on this picture?")
+			.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageData)))
+			.build();
+
+		var response = this.chatModel.call(new Prompt(List.of(userMessage)));
+
+		logger.info("Response: {}", response.getResult().getOutput().getText());
+		assertThat(response.getResult().getOutput().getText()).containsAnyOf("bananas", "apple", "bowl", "basket",
+				"fruit");
 	}
 
-	@SpringBootConfiguration
-	public static class Config {
+	@Test
+	void multiModalityPdfTest() throws IOException {
+		var pdfData = new ClassPathResource("/spring-ai-reference-overview.pdf");
 
-		@Bean
-		public AnthropicApi anthropicApi() {
-			return AnthropicApi.builder().apiKey(getApiKey()).build();
-		}
+		var userMessage = UserMessage.builder()
+			.text("You are a very professional document summarization specialist. Please summarize the given document.")
+			.media(List.of(new Media(new MimeType("application", "pdf"), pdfData)))
+			.build();
+
+		var response = this.chatModel.call(new Prompt(List.of(userMessage)));
+
+		logger.info("Response: {}", response.getResult().getOutput().getText());
+		assertThat(response.getResult().getOutput().getText()).containsAnyOf("Spring AI", "portable API");
+	}
+
+	@Test
+	void thinkingTest() {
+		UserMessage userMessage = new UserMessage(
+				"Are there an infinite number of prime numbers such that n mod 4 == 3?");
+
+		var promptOptions = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.temperature(1.0) // temperature must be 1 when thinking is enabled
+			.maxTokens(16000)
+			.thinkingEnabled(10000L)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), promptOptions));
 
-		private String getApiKey() {
-			String apiKey = System.getenv("ANTHROPIC_API_KEY");
-			if (!StringUtils.hasText(apiKey)) {
-				throw new IllegalArgumentException(
-						"You must provide an API key.  Put it in an environment variable under the name ANTHROPIC_API_KEY");
+		assertThat(response.getResults()).isNotEmpty();
+		assertThat(response.getResults().size()).isGreaterThanOrEqualTo(2);
+
+		for (Generation generation : response.getResults()) {
+			AssistantMessage message = generation.getOutput();
+			if (message.getText() != null && !message.getText().isBlank()) {
+				// Text block
+				assertThat(message.getText()).isNotBlank();
+			}
+			else if (message.getMetadata().containsKey("signature")) {
+				// Thinking block
+				assertThat(message.getMetadata().get("signature")).isNotNull();
+			}
+			else if (message.getMetadata().containsKey("data")) {
+				// Redacted thinking block
+				assertThat(message.getMetadata().get("data")).isNotNull();
 			}
-			return apiKey;
 		}
+	}
 
-		@Bean
-		public AnthropicChatModel openAiChatModel(AnthropicApi api) {
-			return AnthropicChatModel.builder().anthropicApi(api).build();
+	@Test
+	void thinkingWithStreamingTest() {
+		UserMessage userMessage = new UserMessage(
+				"Are there an infinite number of prime numbers such that n mod 4 == 3?");
+
+		var promptOptions = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.temperature(1.0) // temperature must be 1 when thinking is enabled
+			.maxTokens(16000)
+			.thinkingEnabled(10000L)
+			.build();
+
+		Flux<ChatResponse> responseFlux = this.chatModel.stream(new Prompt(List.of(userMessage), promptOptions));
+
+		List<ChatResponse> responses = responseFlux.collectList().block();
+
+		// Verify we got text content
+		String content = responses.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getText)
+			.filter(text -> text != null && !text.isBlank())
+			.collect(Collectors.joining());
+
+		logger.info("Thinking streaming response: {}", content);
+		assertThat(content).isNotBlank();
+
+		// Verify signature was captured in the stream
+		boolean hasSignature = responses.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.anyMatch(msg -> msg.getMetadata().containsKey("signature"));
+
+		assertThat(hasSignature).as("Streaming should capture the thinking block signature").isTrue();
+	}
+
+	@Test
+	void testPlainTextCitation() {
+		AnthropicCitationDocument document = AnthropicCitationDocument.builder()
+			.plainText(
+					"The Eiffel Tower is located in Paris, France. It was completed in 1889 and stands 330 meters tall.")
+			.title("Eiffel Tower Facts")
+			.citationsEnabled(true)
+			.build();
+
+		UserMessage userMessage = new UserMessage(
+				"Based solely on the provided document, where is the Eiffel Tower located and when was it completed?");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.maxTokens(2048)
+			.temperature(0.0)
+			.citationDocuments(document)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResults()).isNotEmpty();
+		assertThat(response.getResult().getOutput().getText()).isNotBlank();
+
+		Object citationsObj = response.getMetadata().get("citations");
+		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
+
+		@SuppressWarnings("unchecked")
+		List<Citation> citations = (List<Citation>) citationsObj;
+		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
+
+		for (Citation citation : citations) {
+			assertThat(citation.getType()).isEqualTo(Citation.LocationType.CHAR_LOCATION);
+			assertThat(citation.getCitedText()).isNotBlank();
+			assertThat(citation.getDocumentIndex()).isEqualTo(0);
+			assertThat(citation.getDocumentTitle()).isEqualTo("Eiffel Tower Facts");
+			assertThat(citation.getStartCharIndex()).isGreaterThanOrEqualTo(0);
+			assertThat(citation.getEndCharIndex()).isGreaterThan(citation.getStartCharIndex());
 		}
+	}
+
+	@Test
+	void testMultipleCitationDocuments() {
+		AnthropicCitationDocument parisDoc = AnthropicCitationDocument.builder()
+			.plainText("Paris is the capital city of France. It has a population of about 2.1 million people.")
+			.title("Paris Information")
+			.citationsEnabled(true)
+			.build();
+
+		AnthropicCitationDocument eiffelDoc = AnthropicCitationDocument.builder()
+			.plainText("The Eiffel Tower was designed by Gustave Eiffel and completed in 1889 for the World's Fair.")
+			.title("Eiffel Tower History")
+			.citationsEnabled(true)
+			.build();
+
+		UserMessage userMessage = new UserMessage(
+				"Based solely on the provided documents, what is the capital of France and who designed the Eiffel Tower?");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.maxTokens(1024)
+			.temperature(0.0)
+			.citationDocuments(parisDoc, eiffelDoc)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResults()).isNotEmpty();
+		assertThat(response.getResult().getOutput().getText()).isNotBlank();
+
+		Object citationsObj = response.getMetadata().get("citations");
+		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
+
+		@SuppressWarnings("unchecked")
+		List<Citation> citations = (List<Citation>) citationsObj;
+		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
+
+		boolean hasDoc0 = citations.stream().anyMatch(c -> c.getDocumentIndex() == 0);
+		boolean hasDoc1 = citations.stream().anyMatch(c -> c.getDocumentIndex() == 1);
+		assertThat(hasDoc0 && hasDoc1).as("Should have citations from both documents").isTrue();
+
+		for (Citation citation : citations) {
+			assertThat(citation.getType()).isEqualTo(Citation.LocationType.CHAR_LOCATION);
+			assertThat(citation.getCitedText()).isNotBlank();
+			assertThat(citation.getDocumentIndex()).isIn(0, 1);
+			assertThat(citation.getDocumentTitle()).isIn("Paris Information", "Eiffel Tower History");
+			assertThat(citation.getStartCharIndex()).isGreaterThanOrEqualTo(0);
+			assertThat(citation.getEndCharIndex()).isGreaterThan(citation.getStartCharIndex());
+		}
+	}
+
+	@Test
+	void testCustomContentCitation() {
+		AnthropicCitationDocument document = AnthropicCitationDocument.builder()
+			.customContent("The Great Wall of China is approximately 21,196 kilometers long.",
+					"It was built over many centuries, starting in the 7th century BC.",
+					"The wall was constructed to protect Chinese states from invasions.")
+			.title("Great Wall Facts")
+			.citationsEnabled(true)
+			.build();
+
+		UserMessage userMessage = new UserMessage(
+				"Based solely on the provided document, how long is the Great Wall of China and when was it started?");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.maxTokens(1024)
+			.temperature(0.0)
+			.citationDocuments(document)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResults()).isNotEmpty();
+		assertThat(response.getResult().getOutput().getText()).isNotBlank();
+
+		Object citationsObj = response.getMetadata().get("citations");
+		assertThat(citationsObj).as("Citations should be present in response metadata").isNotNull();
+
+		@SuppressWarnings("unchecked")
+		List<Citation> citations = (List<Citation>) citationsObj;
+		assertThat(citations).as("Citation list should not be empty").isNotEmpty();
+
+		for (Citation citation : citations) {
+			assertThat(citation.getType()).isEqualTo(Citation.LocationType.CONTENT_BLOCK_LOCATION);
+			assertThat(citation.getCitedText()).isNotBlank();
+			assertThat(citation.getDocumentIndex()).isEqualTo(0);
+			assertThat(citation.getDocumentTitle()).isEqualTo("Great Wall Facts");
+			assertThat(citation.getStartBlockIndex()).isGreaterThanOrEqualTo(0);
+			assertThat(citation.getEndBlockIndex()).isGreaterThanOrEqualTo(citation.getStartBlockIndex());
+		}
+	}
+
+	@Test
+	void testPdfCitation() throws IOException {
+		AnthropicCitationDocument document = AnthropicCitationDocument.builder()
+			.pdfFile("src/test/resources/spring-ai-reference-overview.pdf")
+			.title("Spring AI Reference")
+			.citationsEnabled(true)
+			.build();
+
+		UserMessage userMessage = new UserMessage("Based solely on the provided document, what is Spring AI?");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.maxTokens(1024)
+			.temperature(0.0)
+			.citationDocuments(document)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(List.of(userMessage), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResults()).isNotEmpty();
+		assertThat(response.getResult().getOutput().getText()).isNotBlank();
+
+		Object citationsObj = response.getMetadata().get("citations");
+		assertThat(citationsObj).as("Citations should be present for PDF documents").isNotNull();
+
+		@SuppressWarnings("unchecked")
+		List<Citation> citations = (List<Citation>) citationsObj;
+		assertThat(citations).as("Citation list should not be empty for PDF").isNotEmpty();
+
+		for (Citation citation : citations) {
+			assertThat(citation.getType()).isEqualTo(Citation.LocationType.PAGE_LOCATION);
+			assertThat(citation.getCitedText()).isNotBlank();
+			assertThat(citation.getDocumentIndex()).isEqualTo(0);
+			assertThat(citation.getDocumentTitle()).isEqualTo("Spring AI Reference");
+			assertThat(citation.getStartPageNumber()).isGreaterThan(0);
+			assertThat(citation.getEndPageNumber()).isGreaterThanOrEqualTo(citation.getStartPageNumber());
+		}
+	}
+
+	@Test
+	void structuredOutputWithJsonSchema() {
+		String schema = """
+				{
+					"type": "object",
+					"properties": {
+						"name": {"type": "string"},
+						"capital": {"type": "string"},
+						"population": {"type": "integer"}
+					},
+					"required": ["name", "capital"],
+					"additionalProperties": false
+				}
+				""";
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_6)
+			.outputSchema(schema)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt("Tell me about France. Respond in JSON.", options));
+
+		assertThat(response).isNotNull();
+		String text = response.getResult().getOutput().getText();
+		assertThat(text).isNotEmpty();
+		logger.info("Structured output response: {}", text);
+		// The response should contain JSON with the expected fields
+		assertThat(text).contains("name");
+		assertThat(text).contains("capital");
+	}
+
+	@Test
+	void structuredOutputWithEffort() {
+		String schema = """
+				{
+					"type": "object",
+					"properties": {
+						"answer": {"type": "integer"}
+					},
+					"required": ["answer"],
+					"additionalProperties": false
+				}
+				""";
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_6)
+			.outputSchema(schema)
+			.effort(OutputConfig.Effort.LOW)
+			.build();
+
+		ChatResponse response = this.chatModel
+			.call(new Prompt("What is 2+2? Return the result as JSON with an 'answer' field.", options));
+
+		assertThat(response).isNotNull();
+		String text = response.getResult().getOutput().getText();
+		assertThat(text).isNotEmpty();
+		logger.info("Structured output with effort response: {}", text);
+		assertThat(text).contains("answer");
+	}
+
+	record ActorsFilmsRecord(String actor, List<String> movies) {
 
 	}
 
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelObservationIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelObservationIT.java
similarity index 81%
rename from models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelObservationIT.java
rename to models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelObservationIT.java
index c14611c7357..13535f531de 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelObservationIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicChatModelObservationIT.java
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic;
+package org.springframework.ai.anthropic.chat;
 
 import java.util.List;
 import java.util.stream.Collectors;
 
+import com.anthropic.models.messages.Model;
 import io.micrometer.observation.tck.TestObservationRegistry;
 import io.micrometer.observation.tck.TestObservationRegistryAssert;
 import org.junit.jupiter.api.BeforeEach;
@@ -26,35 +27,34 @@
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import reactor.core.publisher.Flux;
 
-import org.springframework.ai.anthropic.api.AnthropicApi;
+import org.springframework.ai.anthropic.AnthropicChatModel;
+import org.springframework.ai.anthropic.AnthropicChatOptions;
 import org.springframework.ai.chat.metadata.ChatResponseMetadata;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.observation.ChatModelObservationDocumentation.HighCardinalityKeyNames;
 import org.springframework.ai.chat.observation.ChatModelObservationDocumentation.LowCardinalityKeyNames;
 import org.springframework.ai.chat.observation.DefaultChatModelObservationConvention;
 import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.observation.conventions.AiOperationType;
 import org.springframework.ai.observation.conventions.AiProvider;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.SpringBootConfiguration;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Bean;
-import org.springframework.core.retry.RetryTemplate;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
 /**
  * Integration tests for observation instrumentation in {@link AnthropicChatModel}.
  *
- * @author Thomas Vitale
- * @author Alexandros Pappas
+ * @author Soby Chacko
  */
-@SpringBootTest(classes = AnthropicChatModelObservationIT.Config.class,
-		properties = "spring.ai.retry.on-http-codes=429")
+@SpringBootTest(classes = AnthropicChatModelObservationIT.Config.class)
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
 public class AnthropicChatModelObservationIT {
 
+	private static final String TEST_MODEL = Model.CLAUDE_HAIKU_4_5.asString();
+
 	@Autowired
 	TestObservationRegistry observationRegistry;
 
@@ -69,7 +69,7 @@ void beforeEach() {
 	@Test
 	void observationForChatOperation() {
 		var options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.model(TEST_MODEL)
 			.maxTokens(2048)
 			.stopSequences(List.of("this-is-the-end"))
 			.temperature(0.7)
@@ -84,13 +84,13 @@ void observationForChatOperation() {
 		ChatResponseMetadata responseMetadata = chatResponse.getMetadata();
 		assertThat(responseMetadata).isNotNull();
 
-		validate(responseMetadata, "[\"end_turn\"]");
+		validate(responseMetadata);
 	}
 
 	@Test
 	void observationForStreamingChatOperation() {
 		var options = AnthropicChatOptions.builder()
-			.model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.model(TEST_MODEL)
 			.maxTokens(2048)
 			.stopSequences(List.of("this-is-the-end"))
 			.temperature(0.7)
@@ -117,20 +117,19 @@ void observationForStreamingChatOperation() {
 		ChatResponseMetadata responseMetadata = lastChatResponse.getMetadata();
 		assertThat(responseMetadata).isNotNull();
 
-		validate(responseMetadata, "[\"end_turn\"]");
+		validate(responseMetadata);
 	}
 
-	private void validate(ChatResponseMetadata responseMetadata, String finishReasons) {
+	private void validate(ChatResponseMetadata responseMetadata) {
 		TestObservationRegistryAssert.assertThat(this.observationRegistry)
 			.doesNotHaveAnyRemainingCurrentObservation()
 			.hasObservationWithNameEqualTo(DefaultChatModelObservationConvention.DEFAULT_NAME)
 			.that()
-			.hasContextualNameEqualTo("chat " + AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.hasContextualNameEqualTo("chat " + TEST_MODEL)
 			.hasLowCardinalityKeyValue(LowCardinalityKeyNames.AI_OPERATION_TYPE.asString(),
 					AiOperationType.CHAT.value())
 			.hasLowCardinalityKeyValue(LowCardinalityKeyNames.AI_PROVIDER.asString(), AiProvider.ANTHROPIC.value())
-			.hasLowCardinalityKeyValue(LowCardinalityKeyNames.REQUEST_MODEL.asString(),
-					AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5.getValue())
+			.hasLowCardinalityKeyValue(LowCardinalityKeyNames.REQUEST_MODEL.asString(), TEST_MODEL)
 			.hasLowCardinalityKeyValue(LowCardinalityKeyNames.RESPONSE_MODEL.asString(), responseMetadata.getModel())
 			.doesNotHaveHighCardinalityKeyValueWithKey(HighCardinalityKeyNames.REQUEST_FREQUENCY_PENALTY.asString())
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.REQUEST_MAX_TOKENS.asString(), "2048")
@@ -140,7 +139,6 @@ private void validate(ChatResponseMetadata responseMetadata, String finishReason
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.REQUEST_TEMPERATURE.asString(), "0.7")
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.REQUEST_TOP_K.asString(), "1")
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.RESPONSE_ID.asString(), responseMetadata.getId())
-			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.RESPONSE_FINISH_REASONS.asString(), finishReasons)
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.USAGE_INPUT_TOKENS.asString(),
 					String.valueOf(responseMetadata.getUsage().getPromptTokens()))
 			.hasHighCardinalityKeyValue(HighCardinalityKeyNames.USAGE_OUTPUT_TOKENS.asString(),
@@ -160,15 +158,11 @@ public TestObservationRegistry observationRegistry() {
 		}
 
 		@Bean
-		public AnthropicApi anthropicApi() {
-			return AnthropicApi.builder().apiKey(System.getenv("ANTHROPIC_API_KEY")).build();
-		}
-
-		@Bean
-		public AnthropicChatModel anthropicChatModel(AnthropicApi anthropicApi,
-				TestObservationRegistry observationRegistry) {
-			return new AnthropicChatModel(anthropicApi, AnthropicChatOptions.builder().build(),
-					ToolCallingManager.builder().build(), new RetryTemplate(), observationRegistry);
+		public AnthropicChatModel anthropicSdkChatModel(TestObservationRegistry observationRegistry) {
+			return AnthropicChatModel.builder()
+				.options(AnthropicChatOptions.builder().build())
+				.observationRegistry(observationRegistry)
+				.build();
 		}
 
 	}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicPromptCachingIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicPromptCachingIT.java
new file mode 100644
index 00000000000..9d021ddd04d
--- /dev/null
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/AnthropicPromptCachingIT.java
@@ -0,0 +1,403 @@
+/*
+ * Copyright 2023-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic.chat;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.anthropic.models.messages.Model;
+import com.anthropic.models.messages.Usage;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.springframework.ai.anthropic.AnthropicCacheOptions;
+import org.springframework.ai.anthropic.AnthropicCacheStrategy;
+import org.springframework.ai.anthropic.AnthropicCacheTtl;
+import org.springframework.ai.anthropic.AnthropicChatModel;
+import org.springframework.ai.anthropic.AnthropicChatOptions;
+import org.springframework.ai.anthropic.AnthropicTestConfiguration;
+import org.springframework.ai.chat.messages.Message;
+import org.springframework.ai.chat.messages.MessageType;
+import org.springframework.ai.chat.messages.SystemMessage;
+import org.springframework.ai.chat.messages.UserMessage;
+import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.ai.chat.prompt.Prompt;
+import org.springframework.ai.tool.function.FunctionToolCallback;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.core.io.Resource;
+import org.springframework.core.io.ResourceLoader;
+import org.springframework.util.StreamUtils;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Integration tests for Anthropic prompt caching functionality using the Anthropic Java
+ * SDK.
+ *
+ * @author Soby Chacko
+ */
+@SpringBootTest(classes = AnthropicTestConfiguration.class)
+@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
+class AnthropicPromptCachingIT {
+
+	private static final Logger logger = LoggerFactory.getLogger(AnthropicPromptCachingIT.class);
+
+	@Autowired
+	private AnthropicChatModel chatModel;
+
+	@Autowired
+	private ResourceLoader resourceLoader;
+
+	private String loadPrompt(String filename) {
+		try {
+			Resource resource = this.resourceLoader.getResource("classpath:prompts/" + filename);
+			String basePrompt = StreamUtils.copyToString(resource.getInputStream(), StandardCharsets.UTF_8);
+			return basePrompt + "\n\nTest execution timestamp: " + System.currentTimeMillis();
+		}
+		catch (IOException e) {
+			throw new RuntimeException("Failed to load prompt: " + filename, e);
+		}
+	}
+
+	private Usage getSdkUsage(ChatResponse response) {
+		if (response == null || response.getMetadata() == null || response.getMetadata().getUsage() == null) {
+			return null;
+		}
+		Object nativeUsage = response.getMetadata().getUsage().getNativeUsage();
+		return (nativeUsage instanceof Usage usage) ? usage : null;
+	}
+
+	@Test
+	void shouldCacheSystemMessageOnly() {
+		String systemPrompt = loadPrompt("system-only-cache-prompt.txt");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
+			.maxTokens(150)
+			.temperature(0.3)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(
+				List.of(new SystemMessage(systemPrompt), new UserMessage("What is microservices architecture?")),
+				options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+		logger.info("System-only cache response: {}", response.getResult().getOutput().getText());
+
+		Usage usage = getSdkUsage(response);
+		assertThat(usage).isNotNull();
+
+		long cacheCreation = usage.cacheCreationInputTokens().orElse(0L);
+		long cacheRead = usage.cacheReadInputTokens().orElse(0L);
+		assertThat(cacheCreation > 0 || cacheRead > 0)
+			.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
+					cacheCreation, cacheRead)
+			.isTrue();
+
+		logger.info("Cache creation tokens: {}, Cache read tokens: {}", cacheCreation, cacheRead);
+	}
+
+	@Test
+	void shouldCacheSystemAndTools() {
+		String systemPrompt = loadPrompt("system-and-tools-cache-prompt.txt");
+
+		MockWeatherService weatherService = new MockWeatherService();
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build())
+			.maxTokens(200)
+			.temperature(0.3)
+			.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", weatherService)
+				.description("Get current weather for a location")
+				.inputType(MockWeatherService.Request.class)
+				.build())
+			.build();
+
+		ChatResponse response = this.chatModel.call(
+				new Prompt(
+						List.of(new SystemMessage(systemPrompt),
+								new UserMessage(
+										"What's the weather like in San Francisco and should I go for a walk?")),
+						options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+		logger.info("System and tools cache response: {}", response.getResult().getOutput().getText());
+
+		Usage usage = getSdkUsage(response);
+		if (usage != null) {
+			long cacheCreation = usage.cacheCreationInputTokens().orElse(0L);
+			long cacheRead = usage.cacheReadInputTokens().orElse(0L);
+			assertThat(cacheCreation > 0 || cacheRead > 0)
+				.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
+						cacheCreation, cacheRead)
+				.isTrue();
+			logger.info("Cache creation tokens: {}, Cache read tokens: {}", cacheCreation, cacheRead);
+		}
+		else {
+			logger.debug("Native usage metadata not available for tool-based interactions - this is expected");
+			assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+		}
+	}
+
+	@Test
+	void shouldCacheConversationHistory() {
+		String systemPrompt = loadPrompt("system-only-cache-prompt.txt");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder()
+				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
+				.messageTypeMinContentLength(MessageType.USER, 0)
+				.build())
+			.maxTokens(200)
+			.temperature(0.3)
+			.build();
+
+		List<Message> conversationHistory = new ArrayList<>();
+		conversationHistory.add(new SystemMessage(systemPrompt));
+
+		// Turn 1
+		conversationHistory.add(new UserMessage("What is quantum computing? Please explain the basics."));
+		ChatResponse turn1 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn1).isNotNull();
+		conversationHistory.add(turn1.getResult().getOutput());
+
+		Usage usage1 = getSdkUsage(turn1);
+		assertThat(usage1).isNotNull();
+		long turn1Creation = usage1.cacheCreationInputTokens().orElse(0L);
+		logger.info("Turn 1 - Cache creation: {}, Cache read: {}", turn1Creation,
+				usage1.cacheReadInputTokens().orElse(0L));
+
+		// Turn 2
+		conversationHistory.add(new UserMessage("How does quantum entanglement work?"));
+		ChatResponse turn2 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn2).isNotNull();
+		conversationHistory.add(turn2.getResult().getOutput());
+
+		Usage usage2 = getSdkUsage(turn2);
+		assertThat(usage2).isNotNull();
+		long turn2Read = usage2.cacheReadInputTokens().orElse(0L);
+		logger.info("Turn 2 - Cache creation: {}, Cache read: {}", usage2.cacheCreationInputTokens().orElse(0L),
+				turn2Read);
+
+		// If caching started in turn 1, turn 2 should see cache reads
+		if (turn1Creation > 0) {
+			assertThat(turn2Read).as("Turn 2 should read cache from Turn 1").isGreaterThan(0);
+		}
+	}
+
+	@Test
+	void shouldRespectMinLengthForSystemCaching() {
+		String systemPrompt = loadPrompt("system-only-cache-prompt.txt");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder()
+				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+				.messageTypeMinContentLength(MessageType.SYSTEM, systemPrompt.length() + 1)
+				.build())
+			.maxTokens(60)
+			.temperature(0.2)
+			.build();
+
+		ChatResponse response = this.chatModel
+			.call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("Ping")), options));
+
+		assertThat(response).isNotNull();
+		Usage usage = getSdkUsage(response);
+		assertThat(usage).isNotNull();
+		assertThat(usage.cacheCreationInputTokens().orElse(0L)).as("No cache should be created below min length")
+			.isEqualTo(0);
+		assertThat(usage.cacheReadInputTokens().orElse(0L)).as("No cache read expected below min length").isEqualTo(0);
+	}
+
+	@Test
+	void shouldHandleExtendedTtlCaching() {
+		String systemPrompt = loadPrompt("extended-ttl-cache-prompt.txt");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder()
+				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+				.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+				.build())
+			.maxTokens(100)
+			.temperature(0.3)
+			.build();
+
+		ChatResponse response = this.chatModel
+			.call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("What is 2+2?")), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).contains("4");
+		logger.info("Extended TTL cache response: {}", response.getResult().getOutput().getText());
+
+		Usage usage = getSdkUsage(response);
+		assertThat(usage).isNotNull();
+		long cacheCreation = usage.cacheCreationInputTokens().orElse(0L);
+		long cacheRead = usage.cacheReadInputTokens().orElse(0L);
+		assertThat(cacheCreation > 0 || cacheRead > 0)
+			.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
+					cacheCreation, cacheRead)
+			.isTrue();
+
+		logger.info("Extended TTL - Cache creation: {}, Cache read: {}", cacheCreation, cacheRead);
+	}
+
+	@Test
+	void shouldNotCacheWithNoneStrategy() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build())
+			.maxTokens(50)
+			.temperature(0.3)
+			.build();
+
+		ChatResponse response = this.chatModel.call(new Prompt(
+				List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Hello!")), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+
+		Usage usage = getSdkUsage(response);
+		assertThat(usage).isNotNull();
+		assertThat(usage.cacheCreationInputTokens().orElse(0L)).isEqualTo(0);
+		assertThat(usage.cacheReadInputTokens().orElse(0L)).isEqualTo(0);
+	}
+
+	@Test
+	void shouldDemonstrateIncrementalCachingAcrossMultipleTurns() {
+		String largeSystemPrompt = loadPrompt("system-only-cache-prompt.txt");
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder()
+				.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
+				.messageTypeMinContentLength(MessageType.USER, 0)
+				.build())
+			.maxTokens(200)
+			.temperature(0.3)
+			.build();
+
+		List<Message> conversationHistory = new ArrayList<>();
+		conversationHistory.add(new SystemMessage(largeSystemPrompt));
+
+		// Turn 1
+		conversationHistory.add(new UserMessage("What is quantum computing? Please explain the basics."));
+		ChatResponse turn1 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn1).isNotNull();
+		conversationHistory.add(turn1.getResult().getOutput());
+
+		Usage usage1 = getSdkUsage(turn1);
+		assertThat(usage1).isNotNull();
+		boolean cachingStarted = usage1.cacheCreationInputTokens().orElse(0L) > 0;
+
+		// Turn 2
+		conversationHistory.add(new UserMessage("How does quantum entanglement work in this context?"));
+		ChatResponse turn2 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn2).isNotNull();
+		conversationHistory.add(turn2.getResult().getOutput());
+
+		Usage usage2 = getSdkUsage(turn2);
+		assertThat(usage2).isNotNull();
+		if (cachingStarted) {
+			assertThat(usage2.cacheReadInputTokens().orElse(0L)).as("Turn 2 should read cache from Turn 1")
+				.isGreaterThan(0);
+		}
+		cachingStarted = cachingStarted || usage2.cacheCreationInputTokens().orElse(0L) > 0;
+
+		// Turn 3
+		conversationHistory
+			.add(new UserMessage("Can you give me a practical example of quantum computing application?"));
+		ChatResponse turn3 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn3).isNotNull();
+		conversationHistory.add(turn3.getResult().getOutput());
+
+		Usage usage3 = getSdkUsage(turn3);
+		assertThat(usage3).isNotNull();
+		if (cachingStarted) {
+			assertThat(usage3.cacheReadInputTokens().orElse(0L)).as("Turn 3 should read cache").isGreaterThan(0);
+		}
+		cachingStarted = cachingStarted || usage3.cacheCreationInputTokens().orElse(0L) > 0;
+
+		// Turn 4
+		conversationHistory.add(new UserMessage("What are the limitations of current quantum computers?"));
+		ChatResponse turn4 = this.chatModel.call(new Prompt(conversationHistory, options));
+		assertThat(turn4).isNotNull();
+
+		Usage usage4 = getSdkUsage(turn4);
+		assertThat(usage4).isNotNull();
+		assertThat(cachingStarted).as("Caching should have started by turn 4").isTrue();
+		if (cachingStarted) {
+			assertThat(usage4.cacheReadInputTokens().orElse(0L)).as("Turn 4 should read cache").isGreaterThan(0);
+		}
+
+		// Summary
+		logger.info("Turn 1 - Created: {}, Read: {}", usage1.cacheCreationInputTokens().orElse(0L),
+				usage1.cacheReadInputTokens().orElse(0L));
+		logger.info("Turn 2 - Created: {}, Read: {}", usage2.cacheCreationInputTokens().orElse(0L),
+				usage2.cacheReadInputTokens().orElse(0L));
+		logger.info("Turn 3 - Created: {}, Read: {}", usage3.cacheCreationInputTokens().orElse(0L),
+				usage3.cacheReadInputTokens().orElse(0L));
+		logger.info("Turn 4 - Created: {}, Read: {}", usage4.cacheCreationInputTokens().orElse(0L),
+				usage4.cacheReadInputTokens().orElse(0L));
+	}
+
+	@Test
+	void shouldCacheStaticPrefixWithMultiBlockSystemCaching() {
+		String staticSystemPrompt = loadPrompt("system-only-cache-prompt.txt");
+		String dynamicSystemPrompt = "Current user session ID: " + System.currentTimeMillis();
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model(Model.CLAUDE_SONNET_4_20250514.asString())
+			.cacheOptions(AnthropicCacheOptions.builder()
+				.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+				.multiBlockSystemCaching(true)
+				.build())
+			.maxTokens(150)
+			.temperature(0.3)
+			.build();
+
+		ChatResponse response = this.chatModel
+			.call(new Prompt(List.of(new SystemMessage(staticSystemPrompt), new SystemMessage(dynamicSystemPrompt),
+					new UserMessage("What is microservices architecture?")), options));
+
+		assertThat(response).isNotNull();
+		assertThat(response.getResult().getOutput().getText()).isNotEmpty();
+		logger.info("Multi-block system cache response: {}", response.getResult().getOutput().getText());
+
+		Usage usage = getSdkUsage(response);
+		assertThat(usage).isNotNull();
+		long cacheCreation = usage.cacheCreationInputTokens().orElse(0L);
+		long cacheRead = usage.cacheReadInputTokens().orElse(0L);
+		assertThat(cacheCreation > 0 || cacheRead > 0)
+			.withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d",
+					cacheCreation, cacheRead)
+			.isTrue();
+
+		logger.info("Multi-block - Cache creation: {}, Cache read: {}", cacheCreation, cacheRead);
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/MockWeatherService.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/MockWeatherService.java
similarity index 94%
rename from models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/MockWeatherService.java
rename to models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/MockWeatherService.java
index 10f81c7d276..046c88a1ef2 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/tool/MockWeatherService.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/MockWeatherService.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package org.springframework.ai.anthropic.api.tool;
+package org.springframework.ai.anthropic.chat;
 
 import java.util.function.Function;
 
@@ -25,7 +25,9 @@
 import com.fasterxml.jackson.annotation.JsonPropertyDescription;
 
 /**
- * @author Christian Tzolov
+ * Mock weather service for testing tool calling functionality.
+ *
+ * @author Soby Chacko
  */
 public class MockWeatherService implements Function<MockWeatherService.Request, MockWeatherService.Response> {
 
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java
deleted file mode 100644
index 68536c6961f..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.client;
-
-import java.io.IOException;
-import java.net.URL;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import reactor.core.publisher.Flux;
-
-import org.springframework.ai.anthropic.AnthropicChatOptions;
-import org.springframework.ai.anthropic.AnthropicTestConfiguration;
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.anthropic.api.tool.MockWeatherService;
-import org.springframework.ai.chat.client.AdvisorParams;
-import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.client.advisor.SimpleLoggerAdvisor;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.converter.BeanOutputConverter;
-import org.springframework.ai.converter.ListOutputConverter;
-import org.springframework.ai.model.tool.ToolCallingChatOptions;
-import org.springframework.ai.test.CurlyBracketEscaper;
-import org.springframework.ai.tool.annotation.Tool;
-import org.springframework.ai.tool.function.FunctionToolCallback;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.core.ParameterizedTypeReference;
-import org.springframework.core.convert.support.DefaultConversionService;
-import org.springframework.core.io.ClassPathResource;
-import org.springframework.core.io.Resource;
-import org.springframework.test.context.ActiveProfiles;
-import org.springframework.util.MimeTypeUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-@SpringBootTest(classes = AnthropicTestConfiguration.class, properties = "spring.ai.retry.on-http-codes=429")
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-@ActiveProfiles("logging-test")
-class AnthropicChatClientIT {
-
-	private static final Logger logger = LoggerFactory.getLogger(AnthropicChatClientIT.class);
-
-	@Autowired
-	ChatModel chatModel;
-
-	@Value("classpath:/prompts/system-message.st")
-	private Resource systemTextResource;
-
-	@Test
-	void call() {
-
-		// @formatter:off
-		ChatResponse response = ChatClient.create(this.chatModel).prompt()
-				.advisors(new SimpleLoggerAdvisor())
-				.system(s -> s.text(this.systemTextResource)
-						.param("name", "Bob")
-						.param("voice", "pirate"))
-				.user("Tell me about 3 famous pirates from the Golden Age of Piracy and what they did")
-				.call()
-				.chatResponse();
-		// @formatter:on
-
-		logger.info("" + response);
-		assertThat(response.getResults()).hasSize(1);
-		assertThat(response.getResults().get(0).getOutput().getText()).contains("Blackbeard");
-	}
-
-	@Test
-	void listOutputConverterString() {
-		// @formatter:off
-		List<String> collection = ChatClient.create(this.chatModel).prompt()
-				.user(u -> u.text("List five {subject}")
-						.param("subject", "ice cream flavors"))
-				.call()
-				.entity(new ParameterizedTypeReference<>() { });
-		// @formatter:on
-
-		logger.info(collection.toString());
-		assertThat(collection).hasSize(5);
-	}
-
-	@Test
-	void listOutputConverterBean() {
-
-		// @formatter:off
-		List<ActorsFilms> actorsFilms = ChatClient.create(this.chatModel).prompt()
-				.user("Generate the filmography of 5 movies for Tom Hanks and Bill Murray.")
-				.call()
-				.entity(new ParameterizedTypeReference<>() {
-				});
-		// @formatter:on
-
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms).hasSize(2);
-	}
-
-	@Test
-	void listOutputConverterBean2() {
-
-		// @formatter:off
-		List<ActorsFilms> actorsFilms = ChatClient.create(this.chatModel).prompt()
-				.advisors(AdvisorParams.ENABLE_NATIVE_STRUCTURED_OUTPUT)
-				.options(AnthropicChatOptions.builder()
-					.model(AnthropicApi.ChatModel.CLAUDE_SONNET_4_5)
-					.build())
-				.user("Generate the filmography of 5 movies for Tom Hanks and Bill Murray.")
-				.call()
-				.entity(new ParameterizedTypeReference<>() {
-				});
-		// @formatter:on
-
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms).hasSize(2);
-	}
-
-	@Test
-	void customOutputConverter() {
-
-		var toStringListConverter = new ListOutputConverter(new DefaultConversionService());
-
-		// @formatter:off
-		List<String> flavors = ChatClient.create(this.chatModel).prompt()
-				.user(u -> u.text("List five {subject}")
-				.param("subject", "ice cream flavors"))
-				.call()
-				.entity(toStringListConverter);
-		// @formatter:on
-
-		logger.info("ice cream flavors" + flavors);
-		assertThat(flavors).hasSize(5);
-		assertThat(flavors).containsAnyOf("Vanilla", "vanilla");
-	}
-
-	@Test
-	void mapOutputConverter() {
-		// @formatter:off
-		Map<String, Object> result = ChatClient.create(this.chatModel).prompt()
-				.user(u -> u.text("Provide me a List of {subject}")
-						.param("subject", "an array of numbers from 1 to 9 under they key name 'numbers'"))
-				.call()
-				.entity(new ParameterizedTypeReference<>() {
-				});
-		// @formatter:on
-
-		assertThat(result.get("numbers")).isEqualTo(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9));
-	}
-
-	@Test
-	void beanOutputConverter() {
-
-		// @formatter:off
-		ActorsFilms actorsFilms = ChatClient.create(this.chatModel).prompt()
-				.user("Generate the filmography for a random actor.")
-				.call()
-				.entity(ActorsFilms.class);
-		// @formatter:on
-
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms.actor()).isNotBlank();
-	}
-
-	@Test
-	void beanOutputConverterRecords() {
-
-		// @formatter:off
-		ActorsFilms actorsFilms = ChatClient.create(this.chatModel).prompt()
-				.user("Generate the filmography of 5 movies for Tom Hanks.")
-				.call()
-				.entity(ActorsFilms.class);
-		// @formatter:on
-
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
-		assertThat(actorsFilms.movies()).hasSize(5);
-	}
-
-	@Test
-	void beanStreamOutputConverterRecords() {
-
-		BeanOutputConverter<ActorsFilms> outputConverter = new BeanOutputConverter<>(ActorsFilms.class);
-
-		// @formatter:off
-		Flux<String> chatResponse = ChatClient.create(this.chatModel)
-				.prompt()
-				.advisors(new SimpleLoggerAdvisor())
-				.user(u -> u
-						.text("Generate the filmography of 5 movies for Tom Hanks. " + System.lineSeparator()
-								+ "{format}")
-						.param("format", CurlyBracketEscaper.escapeCurlyBrackets(outputConverter.getFormat())))
-				.stream()
-				.content();
-
-		String generationTextFromStream = chatResponse.collectList()
-				.block()
-				.stream()
-				.collect(Collectors.joining());
-		// @formatter:on
-
-		ActorsFilms actorsFilms = outputConverter.convert(generationTextFromStream);
-
-		logger.info("" + actorsFilms);
-		assertThat(actorsFilms.actor()).isEqualTo("Tom Hanks");
-		assertThat(actorsFilms.movies()).hasSize(5);
-	}
-
-	@Test
-	void functionCallTest() {
-
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco (California, USA), Tokyo (Japan), and Paris (France)? Use Celsius.")
-				.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
-					.inputType(MockWeatherService.Request.class)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void functionCallWithGeneratedDescription() {
-
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(FunctionToolCallback.builder("getCurrentWeatherInLocation", new MockWeatherService())
-					.inputType(MockWeatherService.Request.class)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void defaultFunctionCallTest() {
-
-		// @formatter:off
-		String response = ChatClient.builder(this.chatModel)
-				.defaultToolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
-					.description("Get the weather in location")
-					.inputType(MockWeatherService.Request.class)
-					.build())
-				.defaultUser(u -> u.text("What's the weather like in San Francisco, Tokyo, and Paris? Use Celsius."))
-				.build()
-			.prompt()
-			.call()
-			.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void streamFunctionCallTest() {
-
-		// @formatter:off
-		Flux<String> response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris? Use Celsius.")
-				.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
-					.description("Get the weather in location")
-					.inputType(MockWeatherService.Request.class)
-					.build())
-				.stream()
-				.content();
-		// @formatter:on
-
-		String content = response.collectList().block().stream().collect(Collectors.joining());
-		logger.info("Response: {}", content);
-
-		assertThat(content).contains("30", "10", "15");
-	}
-
-	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-haiku-4-5", "claude-sonnet-4-0" })
-	void multiModalityEmbeddedImage(String modelName) throws IOException {
-
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				.options(AnthropicChatOptions.builder().model(modelName).build())
-				.user(u -> u.text("Explain what do you see on this picture?")
-						.media(MimeTypeUtils.IMAGE_PNG, new ClassPathResource("/test.png")))
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info(response);
-		assertThat(response).containsAnyOf("bananas", "apple", "bowl", "basket", "fruit stand");
-	}
-
-	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-haiku-4-5", "claude-sonnet-4-0" })
-	void multiModalityImageUrl(String modelName) throws IOException {
-
-		// TODO: add url method that wraps the checked exception.
-		URL url = new URL("https://docs.spring.io/spring-ai/reference/_images/multimodal.test.png");
-
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				// TODO consider adding model(...) method to ChatClient as a shortcut to
-				.options(AnthropicChatOptions.builder().model(modelName).build())
-				.user(u -> u.text("Explain what do you see on this picture?").media(MimeTypeUtils.IMAGE_PNG, url))
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info(response);
-		assertThat(response).containsAnyOf("bananas", "apple", "bowl", "basket", "fruit stand");
-	}
-
-	@Test
-	void streamingMultiModality() throws IOException {
-
-		// @formatter:off
-		Flux<String> response = ChatClient.create(this.chatModel).prompt()
-				.options(AnthropicChatOptions.builder().model(AnthropicApi.ChatModel.CLAUDE_HAIKU_4_5)
-						.build())
-				.user(u -> u.text("Explain what do you see on this picture?")
-						.media(MimeTypeUtils.IMAGE_PNG, new ClassPathResource("/test.png")))
-				.stream()
-				.content();
-		// @formatter:on
-
-		String content = response.collectList().block().stream().collect(Collectors.joining());
-
-		logger.info("Response: {}", content);
-		assertThat(content).containsAnyOf("bananas", "apple", "bowl", "basket", "fruit stand");
-	}
-
-	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-haiku-4-5", "claude-sonnet-4-0" })
-	void streamToolCallingResponseShouldNotContainToolCallMessages(String modelName) {
-
-		ChatClient chatClient = ChatClient.builder(this.chatModel).build();
-
-		Flux<ChatResponse> responses = chatClient.prompt()
-			.options(ToolCallingChatOptions.builder().model(modelName).build())
-			.tools(new MyTools())
-			.user("Get current weather in Amsterdam and Paris")
-			// .user("Get current weather in Amsterdam. Please don't explain that you will
-			// call tools.")
-			.stream()
-			.chatResponse();
-
-		List<ChatResponse> chatResponses = responses.collectList().block();
-
-		assertThat(chatResponses).isNotEmpty();
-
-		// Verify that none of the ChatResponse objects have tool calls
-		chatResponses.forEach(chatResponse -> {
-			logger.info("ChatResponse Results: {}", chatResponse.getResults());
-			assertThat(chatResponse.hasToolCalls()).isFalse();
-		});
-	}
-
-	public static class MyTools {
-
-		@Tool(description = "Get the current weather forecast by city name")
-		String getCurrentDateTime(String cityName) {
-			return "For " + cityName + " Weather is hot and sunny with a temperature of 20 degrees";
-		}
-
-	}
-
-	record ActorsFilms(String actor, List<String> movies) {
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientMethodInvokingFunctionCallbackIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientMethodInvokingFunctionCallbackIT.java
deleted file mode 100644
index 160d7cb9942..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientMethodInvokingFunctionCallbackIT.java
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.client;
-
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.stream.Collectors;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import reactor.core.publisher.Flux;
-
-import org.springframework.ai.anthropic.AnthropicTestConfiguration;
-import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.model.ToolContext;
-import org.springframework.ai.model.tool.ToolCallingChatOptions;
-import org.springframework.ai.tool.annotation.Tool;
-import org.springframework.ai.tool.method.MethodToolCallback;
-import org.springframework.ai.tool.support.ToolDefinitions;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.test.context.ActiveProfiles;
-import org.springframework.util.ReflectionUtils;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
-
-@SpringBootTest(classes = AnthropicTestConfiguration.class, properties = "spring.ai.retry.on-http-codes=429")
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-@ActiveProfiles("logging-test")
-@SuppressWarnings("null")
-class AnthropicChatClientMethodInvokingFunctionCallbackIT {
-
-	private static final Logger logger = LoggerFactory
-		.getLogger(AnthropicChatClientMethodInvokingFunctionCallbackIT.class);
-
-	public static Map<String, Object> arguments = new ConcurrentHashMap<>();
-
-	@Autowired
-	ChatModel chatModel;
-
-	@BeforeEach
-	void beforeEach() {
-		arguments.clear();
-	}
-
-	@Test
-	void methodGetWeatherGeneratedDescription() {
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "getWeatherInLocation", String.class, Unit.class);
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(toolMethod).build())
-					.toolMethod(toolMethod)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void methodGetWeatherStatic() {
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "getWeatherStatic", String.class, Unit.class);
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(toolMethod)
-						.description("Get the weather in location")
-						.build())
-					.toolMethod(toolMethod)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void methodTurnLightNoResponse() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-
-		var turnLightMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "turnLight", String.class, boolean.class);
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("Turn light on in the living room.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(turnLightMethod)
-						.description("Turn light on in the living room.")
-						.build())
-					.toolMethod(turnLightMethod)
-					.toolObject(targetObject)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(arguments).containsEntry("roomName", "living room");
-		assertThat(arguments).containsEntry("on", true);
-	}
-
-	@Test
-	void methodGetWeatherNonStatic() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "getWeatherNonStatic", String.class, Unit.class);
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(toolMethod)
-						.description("Get the weather in location")
-						.build())
-					.toolMethod(toolMethod)
-					.toolObject(targetObject)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-	}
-
-	@Test
-	void methodGetWeatherToolContext() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "getWeatherWithContext", String.class, Unit.class, ToolContext.class);
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(toolMethod)
-						.description("Get the weather in location")
-						.build())
-					.toolMethod(toolMethod)
-					.toolObject(targetObject)
-					.build())
-				.toolContext(Map.of("tool", "value"))
-				.call()
-				.content();
-
-		logger.info("Response: {}", response);
-
-		assertThat(response).contains("30", "10", "15");
-		assertThat(arguments).containsEntry("tool", "value");
-		// TOOL_CALL_HISTORY is no longer automatically added to ToolContext
-		assertThat(arguments).doesNotContainKey("TOOL_CALL_HISTORY");
-	}
-
-	@Test
-	void methodGetWeatherWithContextMethodButMissingContext() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "getWeatherWithContext", String.class, Unit.class, ToolContext.class);
-
-		assertThatThrownBy(() -> ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolDefinition(ToolDefinitions.builder(toolMethod)
-						.description("Get the weather in location")
-						.build())
-					.toolMethod(toolMethod)
-					.toolObject(targetObject)
-					.build())
-				.call()
-				.content())
-				.isInstanceOf(IllegalArgumentException.class)
-				.hasMessage("ToolContext is required by the method as an argument");
-		// @formatter:on
-	}
-
-	@Test
-	void methodNoParameters() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-		var toolMethod = ReflectionUtils.findMethod(
-			TestFunctionClass.class, "turnLivingRoomLightOn");
-
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("Turn light on in the living room.")
-				.toolCallbacks(MethodToolCallback.builder()
-					.toolMethod(toolMethod)
-					.toolDefinition(ToolDefinitions.builder(toolMethod)
-						.description("Can turn lights on in the Living Room")
-						.build())
-					.toolObject(targetObject)
-					.build())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(arguments).containsEntry("turnLivingRoomLightOn", true);
-	}
-
-	@Test
-	void toolAnnotation() {
-
-		TestFunctionClass targetObject = new TestFunctionClass();
-
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("Turn light red in the living room.")
-				.tools(targetObject)
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(arguments).containsEntry("roomName", "living room")
-			.containsEntry("color", TestFunctionClass.LightColor.RED);
-	}
-
-	// https://github.com/spring-projects/spring-ai/issues/1878
-	@ParameterizedTest
-	@ValueSource(strings = { "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-haiku-4-5" })
-	void streamingParameterLessTool(String modelName) {
-
-		ChatClient chatClient = ChatClient.builder(this.chatModel).build();
-
-		Flux<ChatResponse> responses = chatClient.prompt()
-			.options(ToolCallingChatOptions.builder().model(modelName).build())
-			.tools(new ParameterLessTools())
-			.user("Get current weather in Amsterdam")
-			.stream()
-			.chatResponse();
-
-		String content = responses.collectList()
-			.block()
-			.stream()
-			.filter(cr -> cr.getResult() != null)
-			.map(cr -> cr.getResult().getOutput().getText())
-			.collect(Collectors.joining());
-
-		assertThat(content).contains("20");
-	}
-
-	public static class ParameterLessTools {
-
-		@Tool(description = "Get the current weather forecast in Amsterdam")
-		String getCurrentDateTime() {
-			return "Weather is hot and sunny with a temperature of 20 degrees";
-		}
-
-	}
-
-	record MyRecord(String foo, String bar) {
-	}
-
-	public enum Unit {
-
-		CELSIUS, FAHRENHEIT
-
-	}
-
-	public static class TestFunctionClass {
-
-		public static void argumentLessReturnVoid() {
-			arguments.put("method called", "argumentLessReturnVoid");
-		}
-
-		public static String getWeatherInLocation(String city, Unit unit) {
-			return getWeatherStatic(city, unit);
-		}
-
-		public static String getWeatherStatic(String city, Unit unit) {
-
-			logger.info("City: " + city + " Unit: " + unit);
-
-			arguments.put("city", city);
-			arguments.put("unit", unit);
-
-			double temperature = 0;
-			if (city.contains("Paris")) {
-				temperature = 15;
-			}
-			else if (city.contains("Tokyo")) {
-				temperature = 10;
-			}
-			else if (city.contains("San Francisco")) {
-				temperature = 30;
-			}
-
-			return "temperature: " + temperature + " unit: " + unit;
-		}
-
-		public String getWeatherNonStatic(String city, Unit unit) {
-			return getWeatherStatic(city, unit);
-		}
-
-		public String getWeatherWithContext(String city, Unit unit, ToolContext context) {
-			arguments.put("tool", context.getContext().get("tool"));
-			// TOOL_CALL_HISTORY no longer available - removed
-			return getWeatherStatic(city, unit);
-		}
-
-		public void turnLight(String roomName, boolean on) {
-			arguments.put("roomName", roomName);
-			arguments.put("on", on);
-			logger.info("Turn light in room: {} to: {}", roomName, on);
-		}
-
-		public void turnLivingRoomLightOn() {
-			arguments.put("turnLivingRoomLightOn", true);
-		}
-
-		enum LightColor {
-
-			RED, GREEN, BLUE
-
-		}
-
-		@Tool(description = "Change the lamp color in a room.")
-		public void changeRoomLightColor(String roomName, LightColor color) {
-			arguments.put("roomName", roomName);
-			arguments.put("color", color);
-			logger.info("Change light color in room: {} to color: {}", roomName, color);
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicToolCallAdvisorIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicToolCallAdvisorIT.java
deleted file mode 100644
index 512415f8ece..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicToolCallAdvisorIT.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.client;
-
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-
-import org.springframework.ai.anthropic.AnthropicChatModel;
-import org.springframework.ai.anthropic.api.AnthropicApi;
-import org.springframework.ai.chat.client.advisor.ToolCallAdvisor;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.test.chat.client.advisor.AbstractToolCallAdvisorIT;
-import org.springframework.boot.SpringBootConfiguration;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.util.StringUtils;
-
-/**
- * Integration tests for {@link ToolCallAdvisor} functionality with Anthropic SDK.
- *
- * @author Christian Tzolov
- */
-@SpringBootTest
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-class AnthropicToolCallAdvisorIT extends AbstractToolCallAdvisorIT {
-
-	@Override
-	protected ChatModel getChatModel() {
-		AnthropicApi api = AnthropicApi.builder().apiKey(getApiKey()).build();
-		return AnthropicChatModel.builder().anthropicApi(api).build();
-	}
-
-	private String getApiKey() {
-		String apiKey = System.getenv("ANTHROPIC_API_KEY");
-		if (!StringUtils.hasText(apiKey)) {
-			throw new IllegalArgumentException(
-					"You must provide an API key.  Put it in an environment variable under the name ANTHROPIC_API_KEY");
-		}
-		return apiKey;
-	}
-
-	@SpringBootConfiguration
-	public static class TestConfiguration {
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/ChatClientToolsWithGenericArgumentTypesIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/ChatClientToolsWithGenericArgumentTypesIT.java
deleted file mode 100644
index 8389ddf85d9..00000000000
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/ChatClientToolsWithGenericArgumentTypesIT.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright 2023-present the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.springframework.ai.anthropic.client;
-
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.springframework.ai.anthropic.AnthropicTestConfiguration;
-import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.tool.annotation.Tool;
-import org.springframework.ai.tool.annotation.ToolParam;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-@SpringBootTest(classes = AnthropicTestConfiguration.class)
-@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
-class ChatClientToolsWithGenericArgumentTypesIT {
-
-	private static final Logger logger = LoggerFactory.getLogger(ChatClientToolsWithGenericArgumentTypesIT.class);
-
-	public static Map<String, Object> arguments = new ConcurrentHashMap<>();
-
-	public static AtomicLong callCounter = new AtomicLong(0);
-
-	@BeforeEach
-	void beforeEach() {
-		arguments.clear();
-		callCounter.set(0);
-	}
-
-	@Autowired
-	ChatModel chatModel;
-
-	@Test
-	void toolWithGenericArgumentTypes() {
-		// @formatter:off
-		String response = ChatClient.create(this.chatModel).prompt()
-				.user("Turn light red in the living room and the kitchen. Please group the rooms with the same color in a single tool call.")
-				.tools(new TestToolProvider())
-				.call()
-				.content();
-		// @formatter:on
-
-		logger.info("Response: {}", response);
-
-		assertThat(arguments).containsEntry("living room", LightColor.RED);
-		assertThat(arguments).containsEntry("kitchen", LightColor.RED);
-
-		assertThat(callCounter.get()).isEqualTo(1);
-	}
-
-	record Room(String name) {
-	}
-
-	enum LightColor {
-
-		RED, GREEN, BLUE
-
-	}
-
-	public static class TestToolProvider {
-
-		@Tool(description = "Change the lamp color in a room.")
-		public void changeRoomLightColor(
-				@ToolParam(description = "List of rooms to change the light color for") List<Room> rooms,
-				@ToolParam(description = "light color to change to") LightColor color) {
-
-			logger.info("Change light color in rooms: {} to color: {}", rooms, color);
-
-			for (Room room : rooms) {
-				arguments.put(room.name(), color);
-			}
-			callCounter.incrementAndGet();
-		}
-
-	}
-
-}
diff --git a/models/spring-ai-anthropic/src/test/resources/application-logging-test.properties b/models/spring-ai-anthropic/src/test/resources/application-logging-test.properties
deleted file mode 100644
index d224ff65fac..00000000000
--- a/models/spring-ai-anthropic/src/test/resources/application-logging-test.properties
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# Copyright 2023-present the original author or authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-logging.level.org.springframework.ai.chat.client.advisor=DEBUG
-
-logging.level.org.springframework.ai.anthropic.api.AnthropicApi=INFO
diff --git a/models/spring-ai-anthropic/src/test/resources/prompts/system-message.st b/models/spring-ai-anthropic/src/test/resources/prompts/system-message.st
index dd95164675f..1416de9c2ca 100644
--- a/models/spring-ai-anthropic/src/test/resources/prompts/system-message.st
+++ b/models/spring-ai-anthropic/src/test/resources/prompts/system-message.st
@@ -1,4 +1,4 @@
 You are a helpful AI assistant. Your name is {name}.
 You are an AI assistant that helps people find information.
 Your name is {name}
-You should reply to the user's request with your name and also in the style of a {voice}.
\ No newline at end of file
+You should reply to the user's request with your name and also in the style of a {voice}.
diff --git a/models/spring-ai-anthropic/src/test/resources/sample_events.json b/models/spring-ai-anthropic/src/test/resources/sample_events.json
deleted file mode 100644
index b7727b02d5d..00000000000
--- a/models/spring-ai-anthropic/src/test/resources/sample_events.json
+++ /dev/null
@@ -1,243 +0,0 @@
-[
-    {
-        "type": "message_start",
-        "message": {
-            "id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
-            "type": "message",
-            "role": "assistant",
-            "content": [],
-            "model": "claude-3-5-sonnet-20241022",
-            "stop_reason": null,
-            "stop_sequence": null,
-            "usage": {
-                "input_tokens": 25,
-                "output_tokens": 1
-            }
-        }
-    },
-    {
-        "type": "content_block_start",
-        "index": 0,
-        "content_block": {
-            "type": "text",
-            "text": ""
-        }
-    },
-    {
-        "type": "ping"
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": "Okay"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": ","
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " let"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": "'s"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " check"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " the"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " weather"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " for"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " San"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " Francisco"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": ","
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": " CA"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 0,
-        "delta": {
-            "type": "text_delta",
-            "text": ":"
-        }
-    },
-    {
-        "type": "content_block_stop",
-        "index": 0
-    },
-    {
-        "type": "content_block_start",
-        "index": 1,
-        "content_block": {
-            "type": "tool_use",
-            "id": "toolu_01T1x1fJ34qAmk2tNTrN7Up6",
-            "name": "get_weather",
-            "input": {}
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": ""
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": "{\"location\":"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": " \"San"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": " Francisc"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": "o,"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": " CA\""
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": ", "
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": "\"unit\": \"fah"
-        }
-    },
-    {
-        "type": "content_block_delta",
-        "index": 1,
-        "delta": {
-            "type": "input_json_delta",
-            "partial_json": "renheit\"}"
-        }
-    },
-    {
-        "type": "content_block_stop",
-        "index": 1
-    },
-    {
-        "type": "message_delta",
-        "delta": {
-            "stop_reason": "end_turn",
-            "stop_sequence": null
-        },
-        "usage": {
-            "output_tokens": 15
-        }
-    },
-    {
-        "type": "message_stop"
-    },
-    {
-        "type": "error",
-        "error": {
-            "type": "overloaded_error",
-            "message": "Overloaded"
-        }
-    }
-]
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 57d6aa9a1d3..e60ae9ca45f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -292,6 +292,9 @@
 		<azure-open-ai-client.version>1.0.0-beta.16</azure-open-ai-client.version>
 		<openai-sdk.version>4.17.0</openai-sdk.version>
 		<azure-identity.version>1.18.2</azure-identity.version>
+		<openai-sdk.version>4.13.0</openai-sdk.version>
+		<anthropic-sdk.version>2.16.1</anthropic-sdk.version>
+		<azure-identity.version>1.18.1</azure-identity.version>
 		<jtokkit.version>1.1.0</jtokkit.version>
 		<kotlin.version>2.2.21</kotlin.version>
 
@@ -957,7 +960,7 @@
 							</includes>
 
 							<dependenciesToScan>
-								<dependency>org.springframework.ai:spring-ai-anthropic</dependency>
+								<dependency>org.springframework.ai:spring-ai-anthropic-legacy</dependency>
 							</dependenciesToScan> -->
 						</configuration>
 						<executions>
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
index 44d0500ed92..4bc317505b2 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
@@ -1,52 +1,10 @@
 = Anthropic Chat
 
-link:https://www.anthropic.com/[Anthropic Claude] is a family of foundational AI models that can be used in a variety of applications.
-For developers and businesses, you can leverage the API access and build directly on top of link:https://www.anthropic.com/api[Anthropic's AI infrastructure].
-
-Spring AI supports the Anthropic link:https://docs.anthropic.com/claude/reference/messages_post[Messaging API] for sync and streaming text generations.
-
-TIP: Anthropic's Claude models are also available through Amazon Bedrock Converse.
-Spring AI provides dedicated xref:api/chat/bedrock-converse.adoc[Amazon Bedrock Converse Anthropic] client implementations as well.
+Spring AI supports Anthropic's Claude models through the official link:https://github.com/anthropics/anthropic-sdk-java[Anthropic Java SDK], providing access to Claude through Anthropic's API.
 
 == Prerequisites
 
-You will need to create an API key on the Anthropic portal.
-
-Create an account at https://console.anthropic.com/dashboard[Anthropic API dashboard] and generate the API key on the https://console.anthropic.com/settings/keys[Get API Keys] page.
-
-The Spring AI project defines a configuration property named `spring.ai.anthropic.api-key` that you should set to the value of the `API Key` obtained from anthropic.com.
-
-You can set this configuration property in your `application.properties` file:
-
-[source,properties]
-----
-spring.ai.anthropic.api-key=<your-anthropic-api-key>
-----
-
-For enhanced security when handling sensitive information like API keys, you can use Spring Expression Language (SpEL) to reference a custom environment variable:
-
-[source,yaml]
-----
-# In application.yml
-spring:
-  ai:
-    anthropic:
-      api-key: ${ANTHROPIC_API_KEY}
-----
-
-[source,bash]
-----
-# In your environment or .env file
-export ANTHROPIC_API_KEY=<your-anthropic-api-key>
-----
-
-You can also get this configuration programmatically in your application code:
-
-[source,java]
-----
-// Retrieve API key from a secure source or environment variable
-String apiKey = System.getenv("ANTHROPIC_API_KEY");
-----
+Create an account at the https://console.anthropic.com/[Anthropic Console] and generate an API key on the https://console.anthropic.com/settings/keys[API Keys page].
 
 === Add Repositories and BOM
 
@@ -55,22 +13,11 @@ Refer to the xref:getting-started.adoc#artifact-repositories[Artifact Repositori
 
 To help with dependency management, Spring AI provides a BOM (bill of materials) to ensure that a consistent version of Spring AI is used throughout the entire project. Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build system.
 
+== Auto-Configuration
 
-== Auto-configuration
-
-[NOTE]
-====
-There has been a significant change in the Spring AI auto-configuration, starter modules' artifact names.
-Please refer to the https://docs.spring.io/spring-ai/reference/upgrade-notes.html[upgrade notes] for more information.
-====
-
-Spring AI provides Spring Boot auto-configuration for the Anthropic Chat Client.
-To enable it add the following dependency to your project's Maven `pom.xml` or Gradle `build.gradle` file:
+Spring Boot auto-configuration is available via the `spring-ai-starter-model-anthropic` starter.
+Add it to your project's Maven `pom.xml` file:
 
-[tabs]
-======
-Maven::
-+
 [source, xml]
 ----
 <dependency>
@@ -79,101 +26,111 @@ Maven::
 </dependency>
 ----
 
-Gradle::
-+
+or to your Gradle `build.gradle` build file:
+
 [source,groovy]
 ----
 dependencies {
     implementation 'org.springframework.ai:spring-ai-starter-model-anthropic'
 }
 ----
-======
 
 TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
 
-=== Chat Properties
-
-==== Retry Properties
+=== Configuration Properties
 
-The prefix `spring.ai.retry` is used as the property prefix that lets you configure the retry mechanism for the Anthropic chat model.
+Use the `spring.ai.anthropic.*` properties to configure the Anthropic connection and chat options:
 
-[cols="3,5,1", stripes=even]
+[cols="3,5,1"]
 |====
 | Property | Description | Default
 
-| spring.ai.retry.max-attempts   | Maximum number of retry attempts. |  10
-| spring.ai.retry.backoff.initial-interval | Initial sleep duration for the exponential backoff policy. |  2 sec.
-| spring.ai.retry.backoff.multiplier | Backoff interval multiplier. |  5
-| spring.ai.retry.backoff.max-interval | Maximum backoff duration. |  3 min.
-| spring.ai.retry.on-client-errors | If false, throw a NonTransientAiException, and do not attempt retry for `4xx` client error codes | false
-| spring.ai.retry.exclude-on-http-codes | List of HTTP status codes that should NOT trigger a retry (e.g. to throw NonTransientAiException). | empty
-| spring.ai.retry.on-http-codes | List of HTTP status codes that should trigger a retry (e.g. to throw TransientAiException). | empty
+| `spring.ai.anthropic.api-key` | Anthropic API key | -
+| `spring.ai.anthropic.base-url` | API base URL | `https://api.anthropic.com`
+| `spring.ai.anthropic.chat.options.model` | Model name | `claude-haiku-4-5`
+| `spring.ai.anthropic.chat.options.max-tokens` | Maximum tokens | `4096`
+| `spring.ai.anthropic.chat.options.temperature` | Sampling temperature | -
+| `spring.ai.anthropic.chat.options.top-p` | Top-p sampling | -
+| `spring.ai.anthropic.chat.options.top-k` | Top-k sampling | -
 |====
 
-NOTE: currently the retry policies are not applicable for the streaming API.
+== Manual Configuration
 
-==== Connection Properties
+The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java[AnthropicChatModel] implements the `ChatModel` interface and uses the official Anthropic Java SDK to connect to Claude.
 
-The prefix `spring.ai.anthropic` is used as the property prefix that lets you connect to Anthropic.
+Add the `spring-ai-anthropic` dependency to your project's Maven `pom.xml` file:
 
-[cols="3,5,1", stripes=even]
-|====
-| Property | Description | Default
+[source, xml]
+----
+<dependency>
+    <groupId>org.springframework.ai</groupId>
+    <artifactId>spring-ai-anthropic</artifactId>
+</dependency>
+----
 
-| spring.ai.anthropic.base-url   | The URL to connect to |  https://api.anthropic.com
-| spring.ai.anthropic.completions-path   | The path to append to the base URL. |  `/v1/chat/completions`
-| spring.ai.anthropic.version   | Anthropic API version |  2023-06-01
-| spring.ai.anthropic.api-key    | The API Key           |  -
-| spring.ai.anthropic.beta-version | Enables new/experimental features. If set to `max-tokens-3-5-sonnet-2024-07-15`
-the output tokens limit is increased from `4096` to `8192` tokens (for claude-3-5-sonnet only). | `tools-2024-04-04`
-|====
+or to your Gradle `build.gradle` build file:
+
+[source,groovy]
+----
+dependencies {
+    implementation 'org.springframework.ai:spring-ai-anthropic'
+}
+----
 
-==== Configuration Properties
+TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
 
-[NOTE]
-====
-Enabling and disabling of the chat auto-configurations are now configured via top level properties with the prefix `spring.ai.model.chat`.
+=== Authentication
 
-To enable, spring.ai.model.chat=anthropic (It is enabled by default)
+Configure your API key either programmatically or via environment variable:
 
-To disable, spring.ai.model.chat=none (or any value which doesn't match anthropic)
+[source,java]
+----
+var chatOptions = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .maxTokens(1024)
+    .apiKey(System.getenv("ANTHROPIC_API_KEY"))
+    .build();
 
-This change is done to allow configuration of multiple models.
-====
+var chatModel = new AnthropicChatModel(chatOptions);
+----
 
-The prefix `spring.ai.anthropic.chat` is the property prefix that lets you configure the chat model implementation for Anthropic.
+Or set the environment variable and let the SDK auto-detect it:
 
-[cols="3,5,1", stripes=even]
-|====
-| Property | Description | Default
+[source,bash]
+----
+export ANTHROPIC_API_KEY=<your-api-key>
+----
 
-| spring.ai.anthropic.chat.enabled (Removed and no longer valid) | Enable Anthropic chat model.  | true
-| spring.ai.model.chat | Enable Anthropic chat model.  | anthropic
-| spring.ai.anthropic.chat.options.model | This is the Anthropic Chat model to use. Supports: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-sonnet-4-5`, `claude-opus-4-5`, `claude-opus-4-1`, `claude-sonnet-4-0`, `claude-opus-4-0` | `claude-sonnet-4-6`
-| spring.ai.anthropic.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.8
-| spring.ai.anthropic.chat.options.max-tokens | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. | 500
-| spring.ai.anthropic.chat.options.stop-sequence | Custom text sequences that will cause the model to stop generating. Our models will normally stop when they have naturally completed their turn, which will result in a response stop_reason of "end_turn". If you want the model to stop generating when it encounters custom strings of text, you can use the stop_sequences parameter. If the model encounters one of the custom sequences, the response stop_reason value will be "stop_sequence" and the response stop_sequence value will contain the matched stop sequence. | -
-| spring.ai.anthropic.chat.options.top-p | Use nucleus sampling. In nucleus sampling, we compute the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by top_p. You should either alter temperature or top_p, but not both. Recommended for advanced use cases only. You usually only need to use temperature. | -
-| spring.ai.anthropic.chat.options.top-k | Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Learn more technical details here. Recommended for advanced use cases only. You usually only need to use temperature. | -
-| spring.ai.anthropic.chat.options.tool-names | List of tools, identified by their names, to enable for tool calling in a single prompt requests. Tools with those names must exist in the toolCallbacks registry. | -
-| spring.ai.anthropic.chat.options.tool-callbacks | Tool Callbacks to register with the ChatModel. | -
-| spring.ai.anthropic.chat.options.toolChoice | Controls which (if any) tool is called by the model. `none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a tool. Specifying a particular tool via `{"type: "tool", "name": "my_tool"}` forces the model to call that tool. `none` is the default when no functions are present. `auto` is the default if functions are present. | -
-| spring.ai.anthropic.chat.options.internal-tool-execution-enabled | If false, the Spring AI will not handle the tool calls internally, but will proxy them to the client. Then it is the client's responsibility to handle the tool calls, dispatch them to the appropriate function, and return the results. If true (the default), the Spring AI will handle the function calls internally. Applicable only for chat models with function calling support | true
-| spring.ai.anthropic.chat.options.http-headers | Optional HTTP headers to be added to the chat completion request. | -
-|====
+[source,java]
+----
+// API key will be detected from ANTHROPIC_API_KEY environment variable
+var chatModel = new AnthropicChatModel(
+    AnthropicChatOptions.builder()
+        .model("claude-sonnet-4-20250514")
+        .maxTokens(1024)
+        .build());
+----
+
+=== Basic Usage
 
-TIP: For the latest list of model aliases and their descriptions, see the link:https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases[official Anthropic model aliases documentation].
+[source,java]
+----
+ChatResponse response = chatModel.call(
+    new Prompt("Generate the names of 5 famous pirates."));
 
-TIP: All properties prefixed with `spring.ai.anthropic.chat.options` can be overridden at runtime by adding a request specific <<chat-options>> to the `Prompt` call.
+// Or with streaming responses
+Flux<ChatResponse> stream = chatModel.stream(
+    new Prompt("Generate the names of 5 famous pirates."));
+----
 
 == Runtime Options [[chat-options]]
 
-The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions.java] provides model configurations, such as the model to use, the temperature, the max token count, etc.
+The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions.java] class provides model configurations such as the model to use, temperature, max tokens, etc.
 
-On start-up, the default options can be configured with the `AnthropicChatModel(api, options)` constructor or the `spring.ai.anthropic.chat.options.*` properties.
+On start-up, configure default options with the `AnthropicChatModel(options)` constructor.
 
-At run-time you can override the default options by adding new, request specific, options to the `Prompt` call.
-For example to override the default model and temperature for a specific request:
+At run-time, you can override the default options by adding new, request-specific options to the `Prompt` call.
+For example, to override the default model and temperature for a specific request:
 
 [source,java]
 ----
@@ -181,944 +138,361 @@ ChatResponse response = chatModel.call(
     new Prompt(
         "Generate the names of 5 famous pirates.",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
+            .model("claude-sonnet-4-20250514")
             .temperature(0.4)
         .build()
     ));
 ----
 
-TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/DefaultChatOptionsBuilder.java[ChatOptions#builder()].
-
-== Prompt Caching
-
-Anthropic's https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching[prompt caching feature] allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions.
-When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed.
-
-[NOTE]
-====
-*Supported Models*
-
-Prompt caching is currently supported on Claude Sonnet 4.5, Claude Opus 4.5, Claude Haiku 4.5, Claude Opus 4, Claude Sonnet 4, Claude Sonnet 3.7, Claude Sonnet 3.5, Claude Haiku 3.5, Claude Haiku 3, and Claude Opus 3.
+=== Chat Options
 
-*Token Requirements*
-
-Different models have different minimum token thresholds for cache effectiveness:
-- Claude Sonnet 4: 1024+ tokens
-- Claude Haiku models: 2048+ tokens
-- Other models: 1024+ tokens
-====
-
-=== Cache Strategies
+[cols="3,5,1", stripes=even]
+|====
+| Option | Description | Default
+
+| model | Name of the Claude model to use. Models include: `claude-sonnet-4-20250514`, `claude-opus-4-20250514`, `claude-3-5-sonnet-20241022`, `claude-3-5-haiku-20241022`, etc. See https://docs.anthropic.com/en/docs/about-claude/models[Claude Models]. | `claude-sonnet-4-20250514`
+| maxTokens | The maximum number of tokens to generate in the response. | 4096
+| temperature | Controls randomness in the response. Higher values make output more random, lower values make it more deterministic. Range: 0.0-1.0 | 1.0
+| topP | Nucleus sampling parameter. The model considers tokens with top_p probability mass. | -
+| topK | Only sample from the top K options for each token. | -
+| stopSequences | Custom sequences that will cause the model to stop generating. | -
+| apiKey | The API key for authentication. Auto-detects from `ANTHROPIC_API_KEY` environment variable if not set. | -
+| baseUrl | The base URL for the Anthropic API. | https://api.anthropic.com
+| timeout | Request timeout duration. | 60 seconds
+| maxRetries | Maximum number of retry attempts for failed requests. | 2
+| proxy | Proxy settings for the HTTP client. | -
+| customHeaders | Custom HTTP headers to include on all requests (client-level). | -
+| httpHeaders | Per-request HTTP headers. These are added to individual API calls via `MessageCreateParams.putAdditionalHeader()`. Useful for request-level tracking, beta API headers, or routing. | -
+| thinking | Thinking configuration. Use the convenience builders `thinkingEnabled(budgetTokens)`, `thinkingAdaptive()`, or `thinkingDisabled()`, or pass a raw `ThinkingConfigParam`. | -
+| outputConfig | Output configuration for structured output (JSON schema) and effort control. Use `outputConfig(OutputConfig)` for full control, or the convenience methods `outputSchema(String)` and `effort(OutputConfig.Effort)`. Requires `claude-sonnet-4-6` or newer. | -
+|====
 
-Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum.
-Each strategy automatically places cache breakpoints at optimal locations while staying within Anthropic's 4-breakpoint limit.
+=== Tool Calling Options
 
-[cols="2,3,5", stripes=even]
-|====
-| Strategy | Breakpoints Used | Use Case
-
-| `NONE`
-| 0
-| Disables prompt caching completely.
-Use when requests are one-off or content is too small to benefit from caching.
-
-| `SYSTEM_ONLY`
-| 1
-| Caches system message content.
-Tools are cached implicitly via Anthropic's automatic ~20-block lookback mechanism.
-Use when system prompts are large and stable with fewer than 20 tools.
-
-| `TOOLS_ONLY`
-| 1
-| Caches tool definitions only. System messages remain uncached and are processed fresh on each request.
-Use when tool definitions are large and stable (5000+ tokens) but system prompts change frequently or vary per tenant/context.
-
-| `SYSTEM_AND_TOOLS`
-| 2
-| Caches both tool definitions (breakpoint 1) and system message (breakpoint 2) explicitly.
-Use when you have 20+ tools (beyond automatic lookback) or want deterministic caching of both components.
-System changes don't invalidate tool cache.
-
-| `CONVERSATION_HISTORY`
-| 1-4
-| Caches entire conversation history up to the current user question.
-Use for multi-turn conversations with chat memory where conversation history grows over time.
+[cols="3,5,1", stripes=even]
 |====
+| Option | Description | Default
 
-IMPORTANT: Due to Anthropic's cascade invalidation, changing tool definitions will invalidate ALL downstream cache breakpoints (system, messages).
-Tool stability is critical when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
+| toolChoice | Controls which tool (if any) is called by the model. Use `ToolChoiceAuto`, `ToolChoiceAny`, `ToolChoiceTool`, or `ToolChoiceNone`. | AUTO
+| toolCallbacks | List of tool callbacks to register with the model. | -
+| toolNames | Set of tool names to be resolved at runtime. | -
+| internalToolExecutionEnabled | If false, tool calls are proxied to the client for manual handling. If true, Spring AI handles tool calls internally. | true
+| disableParallelToolUse | When true, the model will use at most one tool per response. | false
+|====
 
-=== Enabling Prompt Caching
+TIP: In addition to the model-specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions], you can use a portable link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/DefaultChatOptionsBuilder.java[ChatOptions#builder()].
 
-Enable prompt caching by setting `cacheOptions` on `AnthropicChatOptions` and choosing a `strategy`.
+== Tool Calling
 
-==== System-Only Caching
+You can register custom Java functions or methods with the `AnthropicChatModel` and have Claude intelligently choose to output a JSON object containing arguments to call one or many of the registered functions/tools.
+This is a powerful technique to connect the LLM capabilities with external tools and APIs.
+Read more about xref:api/tools.adoc[Tool Calling].
 
-Best for: Stable system prompts with <20 tools (tools cached implicitly via automatic lookback).
+=== Basic Tool Calling
 
 [source,java]
 ----
-// Cache system message content (tools cached implicitly)
-ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage("You are a helpful AI assistant with extensive knowledge..."),
-            new UserMessage("What is machine learning?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .maxTokens(500)
-            .build()
-    )
-);
-----
-
-==== Tools-Only Caching
+var chatOptions = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .toolCallbacks(List.of(
+        FunctionToolCallback.builder("getCurrentWeather", new WeatherService())
+            .description("Get the weather in location")
+            .inputType(WeatherService.Request.class)
+            .build()))
+    .build();
 
-Best for: Large stable tool sets with dynamic system prompts (multi-tenant apps, A/B testing).
+var chatModel = new AnthropicChatModel(chatOptions);
 
-[source,java]
-----
-// Cache tool definitions, system prompt processed fresh each time
 ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage("You are a " + persona + " assistant..."), // Dynamic per-tenant
-            new UserMessage("What's the weather like in San Francisco?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.TOOLS_ONLY)
-                .build())
-            .toolCallbacks(weatherToolCallback) // Large tool set cached
-            .maxTokens(500)
-            .build()
-    )
-);
+    new Prompt("What's the weather like in San Francisco?", chatOptions));
 ----
 
-==== System and Tools Caching
+=== Tool Choice Options
 
-Best for: 20+ tools (beyond automatic lookback) or when both components should be cached independently.
+Control how Claude uses tools with the `toolChoice` option:
 
 [source,java]
 ----
-// Cache both tool definitions and system message with independent breakpoints
-// Changing system won't invalidate tool cache (but changing tools invalidates both)
-ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage("You are a weather analysis assistant..."),
-            new UserMessage("What's the weather like in San Francisco?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
-                .build())
-            .toolCallbacks(weatherToolCallback) // 20+ tools
-            .maxTokens(500)
-            .build()
-    )
-);
-----
+import com.anthropic.models.messages.ToolChoiceAny;
+import com.anthropic.models.messages.ToolChoiceTool;
+import com.anthropic.models.messages.ToolChoiceNone;
 
-==== Conversation History Caching
-
-[source,java]
-----
-// Cache conversation history with ChatClient and memory (cache breakpoint on last user message)
-ChatClient chatClient = ChatClient.builder(chatModel)
-    .defaultSystem("You are a personalized career counselor...")
-    .defaultAdvisors(MessageChatMemoryAdvisor.builder(chatMemory)
-        .conversationId(conversationId)
-        .build())
+// Force Claude to use any available tool
+var options = AnthropicChatOptions.builder()
+    .toolChoice(ToolChoiceAny.builder().build())
+    .toolCallbacks(...)
     .build();
 
-String response = chatClient.prompt()
-    .user("What career advice would you give me?")
-    .options(AnthropicChatOptions.builder()
-        .model("claude-sonnet-4-6")
-        .cacheOptions(AnthropicCacheOptions.builder()
-            .strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-            .build())
-        .maxTokens(500)
-        .build())
-    .call()
-    .content();
-----
-
-==== Using ChatClient Fluent API
+// Force Claude to use a specific tool
+var options = AnthropicChatOptions.builder()
+    .toolChoice(ToolChoiceTool.builder().name("getCurrentWeather").build())
+    .toolCallbacks(...)
+    .build();
 
-[source,java]
-----
-String response = ChatClient.create(chatModel)
-    .prompt()
-    .system("You are an expert document analyst...")
-    .user("Analyze this large document: " + document)
-    .options(AnthropicChatOptions.builder()
-        .model("claude-sonnet-4-6")
-        .cacheOptions(AnthropicCacheOptions.builder()
-            .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-            .build())
-        .build())
-    .call()
-    .content();
+// Prevent tool use entirely
+var options = AnthropicChatOptions.builder()
+    .toolChoice(ToolChoiceNone.builder().build())
+    .toolCallbacks(...)
+    .build();
 ----
 
-=== Advanced Caching Options
-
-==== Per-Message TTL (5m or 1h)
-
-By default, cached content uses a 5-minute TTL.
-You can set a 1-hour TTL for specific message types.
-When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header.
+[TIP]
+====
+The Anthropic Java SDK provides convenient static factory methods for common tool choices, which can make your code more concise:
 
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(new SystemMessage(largeSystemPrompt)),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
-                .build())
-            .maxTokens(500)
-            .build()
-    )
-);
-----
+* `ToolChoice.auto()` can be used instead of `ToolChoice.ofAuto(...)`.
+* `ToolChoice.any()` can be used instead of `ToolChoice.ofAny(...)`.
+* `ToolChoice.none()` can be used instead of `ToolChoice.ofNone(...)`.
+====
 
-NOTE: Extended TTL uses Anthropic beta feature `extended-cache-ttl-2025-04-11`.
+=== Streaming Tool Calling
 
-==== Cache Eligibility Filters
+The Anthropic SDK module fully supports tool calling in streaming mode. When Claude decides to call a tool during streaming:
 
-Control when cache breakpoints are used by setting minimum content lengths and an optional token-based length function:
+1. Tool call arguments are accumulated from partial JSON deltas
+2. Tools are executed when the content block completes
+3. Results are sent back to Claude
+4. The conversation continues recursively until Claude provides a final response
 
 [source,java]
 ----
-AnthropicCacheOptions cache = AnthropicCacheOptions.builder()
-    .strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
-    .messageTypeMinContentLength(MessageType.SYSTEM, 1024)
-    .messageTypeMinContentLength(MessageType.USER, 1024)
-    .messageTypeMinContentLength(MessageType.ASSISTANT, 1024)
-    .contentLengthFunction(text -> MyTokenCounter.count(text))
-    .build();
+Flux<ChatResponse> stream = chatModel.stream(
+    new Prompt("What's the weather in Paris, Tokyo, and New York?", chatOptions));
 
-ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(/* messages */),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(cache)
-            .build()
-    )
-);
+String response = stream
+    .collectList()
+    .block()
+    .stream()
+    .map(r -> r.getResult().getOutput().getContent())
+    .filter(Objects::nonNull)
+    .collect(Collectors.joining());
 ----
 
-NOTE: Tool Definitions are always considered for caching if `SYSTEM_AND_TOOLS` strategy is used, regardless of content length.
+== Streaming
 
-=== Usage Example
-
-Here's a complete example demonstrating prompt caching with cost tracking:
+The Anthropic SDK module supports both synchronous and streaming responses. Streaming allows Claude to return responses incrementally as they're generated.
 
 [source,java]
 ----
-// Create system content that will be reused multiple times
-String largeSystemPrompt = "You are an expert software architect specializing in distributed systems...";
-
-// First request - creates cache
-ChatResponse firstResponse = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage(largeSystemPrompt),
-            new UserMessage("What is microservices architecture?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .maxTokens(500)
-            .build()
-    )
-);
-
-// Access cache-related token usage
-AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata()
-    .getUsage().getNativeUsage();
+Flux<ChatResponse> stream = chatModel.stream(new Prompt("Tell me a story"));
 
-System.out.println("Cache creation tokens: " + firstUsage.cacheCreationInputTokens());
-System.out.println("Cache read tokens: " + firstUsage.cacheReadInputTokens());
+stream.subscribe(response -> {
+    String content = response.getResult().getOutput().getContent();
+    if (content != null) {
+        System.out.print(content);
+    }
+});
+----
 
-// Second request with same system prompt - reads from cache  
-ChatResponse secondResponse = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage(largeSystemPrompt),
-            new UserMessage("What are the benefits of event sourcing?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .maxTokens(500)
-            .build()
-    )
-);
+== Extended Thinking
 
-AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata()
-    .getUsage().getNativeUsage();
+Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This is especially useful for complex questions that require step-by-step reasoning, such as math, logic, and analysis tasks.
 
-System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens()); // Should be 0
-System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens()); // Should be > 0
-----
+[NOTE]
+====
+*Supported Models*
 
-=== Token Usage Tracking
+The thinking feature is supported by the following Claude models:
 
-The `Usage` record provides detailed information about cache-related token consumption.
-To access Anthropic-specific cache metrics, use the `getNativeUsage()` method:
+* Claude 4 models (`claude-opus-4-20250514`, `claude-sonnet-4-20250514`)
+* Claude 3.7 Sonnet (`claude-3-7-sonnet-20250219`)
 
-[source,java]
-----
-AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata()
-    .getUsage().getNativeUsage();
-----
+*Model capabilities:*
 
-Cache-specific metrics include:
+* *Claude 3.7 Sonnet*: Returns full thinking output.
+* *Claude 4 models*: Support summarized thinking and enhanced tool integration.
 
-* `cacheCreationInputTokens()`: Returns the number of tokens used when creating a cache entry
-* `cacheReadInputTokens()`: Returns the number of tokens read from an existing cache entry
+API request structure is the same across all supported models, but output behavior varies.
+====
 
-When you first send a cached prompt:
-- `cacheCreationInputTokens()` will be greater than 0
-- `cacheReadInputTokens()` will be 0
+=== Thinking Configuration
 
-When you send the same cached prompt again:
-- `cacheCreationInputTokens()` will be 0
-- `cacheReadInputTokens()` will be greater than 0
+To enable thinking, configure the following:
 
-=== Real-World Use Cases
+1. **Set a thinking budget**: The `budgetTokens` must be >= 1024 and less than `maxTokens`.
+2. **Set temperature to 1.0**: Required when thinking is enabled.
 
-==== Legal Document Analysis
+=== Convenience Builder Methods
 
-Analyze large legal contracts or compliance documents efficiently by caching document content across multiple questions:
+`AnthropicChatOptions.Builder` provides convenience methods for the three thinking modes:
 
 [source,java]
 ----
-// Load a legal contract (PDF or text)
-String legalContract = loadDocument("merger-agreement.pdf"); // ~3000 tokens
-
-// System prompt with legal expertise
-String legalSystemPrompt = "You are an expert legal analyst specializing in corporate law. " +
-    "Analyze the following contract and provide precise answers about terms, obligations, and risks: " +
-    legalContract;
+// Enable thinking with a specific token budget
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .temperature(1.0)
+    .maxTokens(16000)
+    .thinkingEnabled(10000L)    // budget must be >= 1024 and < maxTokens
+    .build();
 
-// First analysis - creates cache
-ChatResponse riskAnalysis = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage(legalSystemPrompt),
-            new UserMessage("What are the key termination clauses and associated penalties?")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .maxTokens(1000)
-            .build()
-    )
-);
+// Let Claude adaptively decide whether to think
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .thinkingAdaptive()
+    .build();
 
-// Subsequent questions reuse cached document - 90% cost savings
-ChatResponse obligationAnalysis = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage(legalSystemPrompt), // Same content - cache hit
-            new UserMessage("List all financial obligations and payment schedules.")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .maxTokens(1000)
-            .build()
-    )
-);
+// Explicitly disable thinking
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .thinkingDisabled()
+    .build();
 ----
 
-==== Batch Code Review
-
-Process multiple code files with consistent review criteria while caching the review guidelines:
+You can also use the raw SDK `ThinkingConfigParam` directly:
 
 [source,java]
 ----
-// Define comprehensive code review guidelines
-String reviewGuidelines = """
-    You are a senior software engineer conducting code reviews. Apply these criteria:
-    - Security vulnerabilities and best practices
-    - Performance optimizations and memory usage
-    - Code maintainability and readability
-    - Testing coverage and edge cases
-    - Design patterns and architecture compliance
-    """;
-
-List<String> codeFiles = Arrays.asList(
-    "UserService.java", "PaymentController.java", "SecurityConfig.java"
-);
+import com.anthropic.models.messages.ThinkingConfigParam;
+import com.anthropic.models.messages.ThinkingConfigEnabled;
 
-List<String> reviews = new ArrayList<>();
-
-for (String filename : codeFiles) {
-    String sourceCode = loadSourceFile(filename);
-    
-    ChatResponse review = chatModel.call(
-        new Prompt(
-            List.of(
-                new SystemMessage(reviewGuidelines), // Cached across all reviews
-                new UserMessage("Review this " + filename + " code:\n\n" + sourceCode)
-            ),
-            AnthropicChatOptions.builder()
-                .model("claude-sonnet-4-6")
-                .cacheOptions(AnthropicCacheOptions.builder()
-                    .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                    .build())
-                .maxTokens(800)
-                .build()
-        )
-    );
-    
-    reviews.add(review.getResult().getOutput().getText());
-}
-
-// Guidelines cached after first request, subsequent reviews are faster and cheaper
+var options = AnthropicChatOptions.builder()
+    .thinking(ThinkingConfigParam.ofEnabled(
+        ThinkingConfigEnabled.builder().budgetTokens(10000L).build()))
+    .build();
 ----
 
-==== Multi-Tenant SaaS with Shared Tools
-
-Build a multi-tenant application where tools are shared but system prompts are customized per tenant:
+=== Non-streaming Example
 
 [source,java]
 ----
-// Define large shared tool set (used by all tenants)
-List<FunctionCallback> sharedTools = Arrays.asList(
-    weatherToolCallback,    // ~500 tokens
-    calendarToolCallback,   // ~800 tokens
-    emailToolCallback,      // ~700 tokens
-    analyticsToolCallback,  // ~600 tokens
-    reportingToolCallback,  // ~900 tokens
-    // ... 20+ more tools, totaling 5000+ tokens
-);
-
-@Service
-public class MultiTenantAIService {
-
-    public String handleTenantRequest(String tenantId, String userQuery) {
-        // Get tenant-specific configuration
-        TenantConfig config = tenantRepository.findById(tenantId);
-
-        // Dynamic system prompt per tenant
-        String tenantSystemPrompt = String.format("""
-            You are %s's AI assistant. Company values: %s.
-            Brand voice: %s. Compliance requirements: %s.
-            """, config.companyName(), config.values(),
-                 config.brandVoice(), config.compliance());
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .temperature(1.0)
+    .maxTokens(16000)
+    .thinkingEnabled(10000L)
+    .build();
 
-        ChatResponse response = chatModel.call(
-            new Prompt(
-                List.of(
-                    new SystemMessage(tenantSystemPrompt), // Different per tenant, NOT cached
-                    new UserMessage(userQuery)
-                ),
-                AnthropicChatOptions.builder()
-                    .model("claude-sonnet-4-6")
-                    .cacheOptions(AnthropicCacheOptions.builder()
-                        .strategy(AnthropicCacheStrategy.TOOLS_ONLY) // Cache tools only
-                        .build())
-                    .toolCallbacks(sharedTools) // Cached once, shared across all tenants
-                    .maxTokens(800)
-                    .build()
-            )
-        );
+ChatResponse response = chatModel.call(
+    new Prompt("Are there an infinite number of prime numbers such that n mod 4 == 3?", options));
 
-        return response.getResult().getOutput().getText();
+// The response contains multiple generations:
+// - ThinkingBlock generations (with "signature" in metadata)
+// - TextBlock generations (with the final answer)
+for (Generation generation : response.getResults()) {
+    AssistantMessage message = generation.getOutput();
+    if (message.getMetadata().containsKey("signature")) {
+        // This is a thinking block - contains Claude's reasoning
+        System.out.println("Thinking: " + message.getText());
+        System.out.println("Signature: " + message.getMetadata().get("signature"));
+    }
+    else if (message.getMetadata().containsKey("data")) {
+        // This is a redacted thinking block (safety-redacted reasoning)
+        System.out.println("Redacted thinking data: " + message.getMetadata().get("data"));
+    }
+    else if (message.getText() != null && !message.getText().isBlank()) {
+        // This is the final text response
+        System.out.println("Answer: " + message.getText());
     }
 }
-
-// Tools cached once (5000 tokens @ 10% = 500 token cost for cache hits)
-// Each tenant's unique system prompt processed fresh (200-500 tokens @ 100%)
-// Total per request: ~700-1000 tokens vs 5500+ without TOOLS_ONLY
 ----
 
-==== Customer Support with Knowledge Base
+=== Streaming Example
 
-Create a customer support system that caches your product knowledge base for consistent, accurate responses:
+Thinking is fully supported in streaming mode. Thinking deltas and signature deltas are emitted as they arrive:
 
 [source,java]
 ----
-// Load comprehensive product knowledge
-String knowledgeBase = """
-    PRODUCT DOCUMENTATION:
-    - API endpoints and authentication methods
-    - Common troubleshooting procedures
-    - Billing and subscription details
-    - Integration guides and examples
-    - Known issues and workarounds
-    """ + loadProductDocs(); // ~2500 tokens
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .temperature(1.0)
+    .maxTokens(16000)
+    .thinkingEnabled(10000L)
+    .build();
 
-@Service
-public class CustomerSupportService {
+Flux<ChatResponse> stream = chatModel.stream(
+    new Prompt("Are there an infinite number of prime numbers such that n mod 4 == 3?", options));
 
-    public String handleCustomerQuery(String customerQuery, String customerId) {
-        ChatResponse response = chatModel.call(
-            new Prompt(
-                List.of(
-                    new SystemMessage("You are a helpful customer support agent. " +
-                        "Use this knowledge base to provide accurate solutions: " + knowledgeBase),
-                    new UserMessage("Customer " + customerId + " asks: " + customerQuery)
-                ),
-                AnthropicChatOptions.builder()
-                    .model("claude-sonnet-4-6")
-                    .cacheOptions(AnthropicCacheOptions.builder()
-                        .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                        .build())
-                    .maxTokens(600)
-                    .build()
-            )
-        );
+stream.subscribe(response -> {
+    Generation generation = response.getResult();
+    AssistantMessage message = generation.getOutput();
 
-        return response.getResult().getOutput().getText();
+    if (message.getMetadata().containsKey("thinking")) {
+        // Incremental thinking content
+        System.out.print(message.getText());
     }
-}
-
-// Knowledge base is cached across all customer queries
-// Multiple support agents can benefit from the same cached content
+    else if (message.getMetadata().containsKey("signature")) {
+        // Thinking block signature (emitted at end of thinking)
+        System.out.println("\nSignature: " + message.getMetadata().get("signature"));
+    }
+    else if (message.getText() != null) {
+        // Final text content
+        System.out.print(message.getText());
+    }
+});
 ----
 
-=== Best Practices
+=== Response Structure
 
-1. **Choose the Right Strategy**:
-   - Use `SYSTEM_ONLY` for stable system prompts with <20 tools (tools cached implicitly via automatic lookback)
-   - Use `TOOLS_ONLY` for large stable tool sets (5000+ tokens) with dynamic system prompts (multi-tenant, A/B testing)
-   - Use `SYSTEM_AND_TOOLS` when you have 20+ tools (beyond automatic lookback) or want both cached independently
-   - Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations
-   - Use `NONE` to explicitly disable caching
+When thinking is enabled, the response contains different types of content:
 
-2. **Understand Cascade Invalidation**: Anthropic's cache hierarchy (`tools → system → messages`) means changes flow downward:
-   - Changing **tools** invalidates: tools + system + messages (all caches) ❌❌❌
-   - Changing **system** invalidates: system + messages (tools cache remains valid) ✅❌❌
-   - Changing **messages** invalidates: messages only (tools and system caches remain valid) ✅✅❌
+[cols="2,3,3", stripes=even]
+|====
+| Content Type | Metadata Key | Description
 
-   **Tool stability is critical** when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
+| **Thinking Block** | `signature` | Claude's reasoning text with a cryptographic signature. In sync mode, the thinking text is in `getText()` and the signature is in `getMetadata().get("signature")`.
+| **Redacted Thinking** | `data` | Safety-redacted reasoning. Contains only a `data` marker, no visible text.
+| **Signature (streaming)** | `signature` | In streaming mode, the signature arrives as a separate delta at the end of a thinking block.
+| **Thinking Delta (streaming)** | `thinking` | Incremental thinking text chunks during streaming. The `thinking` metadata key is set to `true`.
+| **Text Block** | _(none)_ | The final answer text in `getText()`.
+|====
 
-3. **SYSTEM_AND_TOOLS Independence**: With `SYSTEM_AND_TOOLS`, changing the system message does NOT invalidate the tool cache, allowing efficient reuse of cached tools even when system prompts vary.
+== Multi-Modal Support
 
-4. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models).
+The Anthropic SDK module supports multi-modal inputs, allowing you to send images and PDF documents alongside text in your prompts.
 
-5. **Reuse Identical Content**: Caching works best with exact matches of prompt content. Even small changes will require a new cache entry.
+=== Image Input
 
-6. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness:
-   ```java
-   AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage();
-   if (usage != null) {
-       System.out.println("Cache creation: " + usage.cacheCreationInputTokens());
-       System.out.println("Cache read: " + usage.cacheReadInputTokens());
-   }
-   ```
+Send images to Claude for analysis using the `Media` class:
 
-7. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
+[source,java]
+----
+var imageResource = new ClassPathResource("/test-image.png");
 
-8. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer.
+var userMessage = UserMessage.builder()
+    .text("What do you see in this image?")
+    .media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
+    .build();
 
-9. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response.
+ChatResponse response = chatModel.call(new Prompt(List.of(userMessage)));
+----
 
-=== Implementation Details
+Supported image formats: PNG, JPEG, GIF, WebP. Images can be provided as:
 
-The prompt caching implementation in Spring AI follows these key design principles:
+* Byte arrays (automatically base64-encoded)
+* HTTPS URLs (passed directly to the API)
 
-1. **Strategic Cache Placement**: Cache breakpoints are automatically placed at optimal locations based on the chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
-   - `CONVERSATION_HISTORY` places cache breakpoints on: tools (if present), system message, and the last user message
-   - This enables Anthropic's prefix matching to incrementally cache the growing conversation history
-   - Each turn builds on the previous cached prefix, maximizing cache reuse
+=== PDF Document Input
 
-2. **Provider Portability**: Cache configuration is done through `AnthropicChatOptions` rather than individual messages, preserving compatibility when switching between different AI providers.
+Send PDF documents for Claude to analyze:
 
-3. **Thread Safety**: The cache breakpoint tracking is implemented with thread-safe mechanisms to handle concurrent requests correctly.
+[source,java]
+----
+var pdfResource = new ClassPathResource("/document.pdf");
 
-4. **Automatic Content Ordering**: The implementation ensures proper on-the-wire ordering of JSON content blocks and cache controls according to Anthropic's API requirements.
+var userMessage = UserMessage.builder()
+    .text("Please summarize this document.")
+    .media(List.of(new Media(new MimeType("application", "pdf"), pdfResource)))
+    .build();
 
-5. **Aggregate Eligibility Checking**: For `CONVERSATION_HISTORY`, the implementation considers all message types (user, assistant, tool) within the last ~20 content blocks when determining if the combined content meets the minimum token threshold for caching.
+ChatResponse response = chatModel.call(new Prompt(List.of(userMessage)));
+----
 
-=== Future Enhancements
+=== Multiple Media Items
 
-The current cache strategies are designed to handle **90% of common use cases** effectively. For applications requiring more granular control, future enhancements may include:
+You can include multiple images or documents in a single message:
 
-- **Message-level cache control** for fine-grained breakpoint placement
-- **Multi-block content caching** within individual messages  
-- **Advanced cache boundary selection** for complex tool scenarios
-- **Mixed TTL strategies** for optimized cache hierarchies
+[source,java]
+----
+var userMessage = UserMessage.builder()
+    .text("Compare these two images.")
+    .media(List.of(
+        new Media(MimeTypeUtils.IMAGE_PNG, image1Resource),
+        new Media(MimeTypeUtils.IMAGE_PNG, image2Resource)))
+    .build();
+----
 
-These enhancements will maintain full backward compatibility while unlocking Anthropic's complete prompt caching capabilities for specialized use cases.
-
-== Thinking
-
-Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning.
-
-[NOTE]
-====
-*Supported Models*
-
-The thinking feature is supported by the following Claude models:
-
-* Claude 4.6 models (`claude-opus-4-6`, `claude-sonnet-4-6`)
-* Claude 4.5 models (`claude-opus-4-5`, `claude-sonnet-4-5`)
-* Claude Haiku 4.5 (`claude-haiku-4-5`)
-
-*Model capabilities:*
-
-* *Claude 4.6 models*: Support summarized thinking, interleaved thinking, adaptive thinking, and enhanced tool integration.
-* *Claude 4.5 models*: Support summarized thinking, interleaved thinking, and enhanced tool integration.
-* *Claude Haiku 4.5*: Supports extended thinking but not adaptive thinking.
-
-API request structure is the same across all supported models, but output behavior varies.
-====
-
-=== Thinking Configuration
-
-To enable thinking on any supported Claude model, include the following configuration in your request:
-
-==== Required Configuration
-
-1. **Add the `thinking` object**:
-- `"type": "enabled"`
-- `budget_tokens`: Token limit for reasoning (recommend starting at 1024)
-
-2. **Token budget rules**:
-- `budget_tokens` must typically be less than `max_tokens`
-- Claude may use fewer tokens than allocated
-- Larger budgets increase depth of reasoning but may impact latency
-- When using tool use with interleaved thinking (Claude 4 only), this constraint is relaxed, but not yet supported in Spring AI.
-
-==== Key Considerations
-
-* **All current Claude models** return a *summarized* version of the model's internal reasoning to reduce latency and protect sensitive content
-* **Thinking tokens are billable** as part of output tokens (even if not all are visible in response)
-* **Interleaved Thinking** is only available on Claude 4.5+ models and requires the beta header `interleaved-thinking-2025-05-14`
-* **Adaptive Thinking** is available on Claude 4.6 models (Opus and Sonnet)
-
-==== Tool Integration and Interleaved Thinking
-
-Claude 4 models support interleaved thinking with tool use, allowing the model to reason between tool calls.
-
-[NOTE]
-====
-The current Spring AI implementation supports basic thinking and tool use separately, but does not yet support interleaved thinking with tool use (where thinking continues across multiple tool calls).
-====
-
-For details on interleaved thinking with tool use, see the https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#extended-thinking-with-tool-use[Anthropic documentation].
-
-=== Non-streaming Example
-
-Here's how to enable thinking in a non-streaming request using the ChatClient API:
-
-[source,java]
-----
-ChatClient chatClient = ChatClient.create(chatModel);
-
-// Enable thinking with explicit configuration
-ChatResponse response = chatClient.prompt()
-    .options(AnthropicChatOptions.builder()
-        .model("claude-sonnet-4-6")
-        .temperature(1.0)  // Temperature should be set to 1 when thinking is enabled
-        .maxTokens(8192)
-        .build())
-    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
-    .call()
-    .chatResponse();
-
-// Use a more capable model for complex reasoning
-ChatResponse response4 = chatClient.prompt()
-    .options(AnthropicChatOptions.builder()
-        .model("claude-opus-4-6")
-        .maxTokens(8192)
-        // No explicit thinking configuration needed
-        .build())
-    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
-    .call()
-    .chatResponse();
-
-// Process the response which may contain thinking content
-for (Generation generation : response.getResults()) {
-    AssistantMessage message = generation.getOutput();
-    if (message.getText() != null) {
-        // Regular text response
-        System.out.println("Text response: " + message.getText());
-    }
-    else if (message.getMetadata().containsKey("signature")) {
-        // Thinking content
-        System.out.println("Thinking: " + message.getMetadata().get("thinking"));
-        System.out.println("Signature: " + message.getMetadata().get("signature"));
-    }
-}
-----
-
-=== Streaming Example
-
-You can also use thinking with streaming responses:
-
-[source,java]
-----
-ChatClient chatClient = ChatClient.create(chatModel);
-
-// For Claude 3.7 Sonnet - explicit thinking configuration
-Flux<ChatResponse> responseFlux = chatClient.prompt()
-    .options(AnthropicChatOptions.builder()
-        .model("claude-sonnet-4-6")
-        .temperature(1.0)
-        .maxTokens(8192)
-        .thinking(AnthropicApi.ThinkingType.ENABLED, 2048)
-        .build())
-    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
-    .stream();
-
-// For Claude 4 models - thinking is enabled by default
-Flux<ChatResponse> responseFlux4 = chatClient.prompt()
-    .options(AnthropicChatOptions.builder()
-        .model("claude-opus-4-0")
-        .maxTokens(8192)
-        // No explicit thinking configuration needed
-        .build())
-    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
-    .stream();
-
-// For streaming, you might want to collect just the text responses
-String textContent = responseFlux.collectList()
-    .block()
-    .stream()
-    .map(ChatResponse::getResults)
-    .flatMap(List::stream)
-    .map(Generation::getOutput)
-    .map(AssistantMessage::getText)
-    .filter(text -> text != null && !text.isBlank())
-    .collect(Collectors.joining());
-----
-
-=== Tool Use Integration
-
-Claude 4 models integrate thinking and tool use capabilities:
-
-* *Claude 3.7 Sonnet*: Supports both thinking and tool use, but they operate separately and require more explicit configuration
-* *Claude 4 models*: Natively interleave thinking and tool use, providing deeper reasoning during tool interactions
-
-=== Benefits of Using Thinking
-
-The thinking feature provides several benefits:
-
-1. **Transparency**: See the model's reasoning process and how it arrived at its conclusion
-2. **Debugging**: Identify where the model might be making logical errors
-3. **Education**: Use the step-by-step reasoning as a teaching tool
-4. **Complex Problem Solving**: Better results on math, logic, and reasoning tasks
-
-Note that enabling thinking requires a higher token budget, as the thinking process itself consumes tokens from your allocation.
-
-== Tool/Function Calling
-
-You can register custom Java Tools with the `AnthropicChatModel` and have the Anthropic Claude model intelligently choose to output a JSON object containing arguments to call one or many of the registered functions.
-This is a powerful technique to connect the LLM capabilities with external tools and APIs.
-Read more about xref:api/tools.adoc[Tool Calling].
-
-=== Tool Choice
-
-The `tool_choice` parameter allows you to control how the model uses the provided tools. This feature gives you fine-grained control over tool execution behavior.
-
-For complete API details, see the https://docs.anthropic.com/en/api/messages#body-tool-choice[Anthropic tool_choice documentation].
-
-==== Tool Choice Options
-
-Spring AI provides four tool choice strategies through the `AnthropicApi.ToolChoice` interface:
-
-* **`ToolChoiceAuto`** (default): The model automatically decides whether to use tools or respond with text
-* **`ToolChoiceAny`**: The model must use at least one of the available tools
-* **`ToolChoiceTool`**: The model must use a specific tool by name
-* **`ToolChoiceNone`**: The model cannot use any tools
-
-==== Disabling Parallel Tool Use
-
-All tool choice options (except `ToolChoiceNone`) support a `disableParallelToolUse` parameter. When set to `true`, the model will output at most one tool use.
-
-==== Usage Examples
-
-===== Auto Mode (Default Behavior)
-
-Let the model decide whether to use tools:
-
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What's the weather in San Francisco?",
-        AnthropicChatOptions.builder()
-            .toolChoice(new AnthropicApi.ToolChoiceAuto())
-            .toolCallbacks(weatherToolCallback)
-            .build()
-    )
-);
-----
-
-===== Force Tool Use (Any)
-
-Require the model to use at least one tool:
-
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What's the weather?",
-        AnthropicChatOptions.builder()
-            .toolChoice(new AnthropicApi.ToolChoiceAny())
-            .toolCallbacks(weatherToolCallback, calculatorToolCallback)
-            .build()
-    )
-);
-----
-
-===== Force Specific Tool
-
-Require the model to use a specific tool by name:
-
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What's the weather in San Francisco?",
-        AnthropicChatOptions.builder()
-            .toolChoice(new AnthropicApi.ToolChoiceTool("get_weather"))
-            .toolCallbacks(weatherToolCallback, calculatorToolCallback)
-            .build()
-    )
-);
-----
-
-===== Disable Tool Use
-
-Prevent the model from using any tools:
-
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What's the weather in San Francisco?",
-        AnthropicChatOptions.builder()
-            .toolChoice(new AnthropicApi.ToolChoiceNone())
-            .toolCallbacks(weatherToolCallback)
-            .build()
-    )
-);
-----
-
-===== Disable Parallel Tool Use
-
-Force the model to use only one tool at a time:
-
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What's the weather in San Francisco and what's 2+2?",
-        AnthropicChatOptions.builder()
-            .toolChoice(new AnthropicApi.ToolChoiceAuto(true)) // disableParallelToolUse = true
-            .toolCallbacks(weatherToolCallback, calculatorToolCallback)
-            .build()
-    )
-);
-----
-
-==== Using ChatClient API
-
-You can also use tool choice with the fluent ChatClient API:
-
-[source,java]
-----
-String response = ChatClient.create(chatModel)
-    .prompt()
-    .user("What's the weather in San Francisco?")
-    .options(AnthropicChatOptions.builder()
-        .toolChoice(new AnthropicApi.ToolChoiceTool("get_weather"))
-        .build())
-    .call()
-    .content();
-----
-
-==== Use Cases
-
-* **Validation**: Use `ToolChoiceTool` to ensure a specific tool is called for critical operations
-* **Efficiency**: Use `ToolChoiceAny` when you know a tool must be used to avoid unnecessary text generation
-* **Control**: Use `ToolChoiceNone` to temporarily disable tool access while keeping tool definitions registered
-* **Sequential Processing**: Use `disableParallelToolUse` to force sequential tool execution for dependent operations
-
-== Multimodal
-
-Multimodality refers to a model's ability to simultaneously understand and process information from various sources, including text, pdf, images, data formats. 
-
-=== Images
-Currently, Anthropic Claude 3 supports the `base64` source type for `images`, and the `image/jpeg`, `image/png`, `image/gif`, and `image/webp` media types.
-Check the link:https://docs.anthropic.com/claude/docs/vision[Vision guide] for more information.
-Anthropic Claude 3.5 Sonnet also supports the `pdf` source type for `application/pdf` files.
-
-Spring AI's `Message` interface supports multimodal AI models by introducing the Media type.
-This type contains data and information about media attachments in messages, using Spring's `org.springframework.util.MimeType` and a `java.lang.Object` for the raw media data.
-
-Below is a simple code example extracted from https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java[AnthropicChatModelIT.java], demonstrating the combination of user text with an image.
-
-[source,java]
-----
-var imageData = new ClassPathResource("/multimodal.test.png");
-
-var userMessage = new UserMessage("Explain what do you see on this picture?",
-        List.of(new Media(MimeTypeUtils.IMAGE_PNG, this.imageData)));
-
-ChatResponse response = chatModel.call(new Prompt(List.of(this.userMessage)));
-
-logger.info(response.getResult().getOutput().getText());
-----
-
-It takes as an input the `multimodal.test.png` image:
-
-image::multimodal.test.png[Multimodal Test Image, 200, 200, align="left"]
-
-along with the text message "Explain what do you see on this picture?", and generates a response something like:
-
-----
-The image shows a close-up view of a wire fruit basket containing several pieces of fruit.
-...
-----
-
-=== PDF
-
-Starting with Sonnet 3.5 https://docs.anthropic.com/en/docs/build-with-claude/pdf-support[PDF support (beta)] is provided.
-Use the `application/pdf` media type to attach a PDF file to the message:
-
-[source,java]
-----
-var pdfData = new ClassPathResource("/spring-ai-reference-overview.pdf");
-
-var userMessage = new UserMessage(
-        "You are a very professional document summarization specialist. Please summarize the given document.",
-        List.of(new Media(new MimeType("application", "pdf"), pdfData)));
-
-var response = this.chatModel.call(new Prompt(List.of(userMessage)));
-----
-
-== Citations
+== Citations
 
 Anthropic's https://docs.anthropic.com/en/docs/build-with-claude/citations[Citations API] allows Claude to reference specific parts of provided documents when generating responses.
 When citation documents are included in a prompt, Claude can cite the source material, and citation metadata (character ranges, page numbers, or content blocks) is returned in the response metadata.
@@ -1147,13 +521,13 @@ Three types of citation documents are supported:
 
 === Creating Citation Documents
 
-Use the `CitationDocument` builder to create documents that can be cited:
+Use the `AnthropicCitationDocument` builder to create documents that can be cited:
 
 ==== Plain Text Documents
 
 [source,java]
 ----
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .plainText("The Eiffel Tower was completed in 1889 in Paris, France. " +
                "It stands 330 meters tall and was designed by Gustave Eiffel.")
     .title("Eiffel Tower Facts")
@@ -1166,7 +540,7 @@ CitationDocument document = CitationDocument.builder()
 [source,java]
 ----
 // From file path
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .pdfFile("path/to/document.pdf")
     .title("Technical Specification")
     .citationsEnabled(true)
@@ -1174,7 +548,7 @@ CitationDocument document = CitationDocument.builder()
 
 // From byte array
 byte[] pdfBytes = loadPdfBytes();
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .pdf(pdfBytes)
     .title("Product Manual")
     .citationsEnabled(true)
@@ -1187,7 +561,7 @@ For fine-grained citation control, use custom content blocks:
 
 [source,java]
 ----
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .customContent(
         "The Great Wall of China is approximately 21,196 kilometers long.",
         "It was built over many centuries, starting in the 7th century BC.",
@@ -1208,7 +582,7 @@ ChatResponse response = chatModel.call(
     new Prompt(
         "When was the Eiffel Tower built and how tall is it?",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
+            .model("claude-sonnet-4-20250514")
             .maxTokens(1024)
             .citationDocuments(document)
             .build()
@@ -1222,13 +596,13 @@ You can provide multiple documents for Claude to reference:
 
 [source,java]
 ----
-CitationDocument parisDoc = CitationDocument.builder()
+AnthropicCitationDocument parisDoc = AnthropicCitationDocument.builder()
     .plainText("Paris is the capital city of France with a population of 2.1 million.")
     .title("Paris Information")
     .citationsEnabled(true)
     .build();
 
-CitationDocument eiffelDoc = CitationDocument.builder()
+AnthropicCitationDocument eiffelDoc = AnthropicCitationDocument.builder()
     .plainText("The Eiffel Tower was designed by Gustave Eiffel for the 1889 World's Fair.")
     .title("Eiffel Tower History")
     .citationsEnabled(true)
@@ -1238,7 +612,7 @@ ChatResponse response = chatModel.call(
     new Prompt(
         "What is the capital of France and who designed the Eiffel Tower?",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
+            .model("claude-sonnet-4-20250514")
             .citationDocuments(parisDoc, eiffelDoc)
             .build()
     )
@@ -1325,7 +699,7 @@ Here's a complete example demonstrating citation usage:
 [source,java]
 ----
 // Create a citation document
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .plainText("Spring AI is an application framework for AI engineering. " +
                "It provides a Spring-friendly API for developing AI applications. " +
                "The framework includes abstractions for chat models, embedding models, " +
@@ -1339,7 +713,7 @@ ChatResponse response = chatModel.call(
     new Prompt(
         "What is Spring AI?",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
+            .model("claude-sonnet-4-20250514")
             .maxTokens(1024)
             .citationDocuments(document)
             .build()
@@ -1373,97 +747,6 @@ if (citations != null && !citations.isEmpty()) {
 4. **Leverage multiple documents**: When answering questions that span multiple sources, provide all relevant documents in a single request rather than making multiple calls.
 5. **Use appropriate document types**: Choose plain text for simple content, PDF for existing documents, and custom content blocks when you need fine-grained control over citation granularity.
 
-=== Real-World Use Cases
-
-==== Legal Document Analysis
-
-Analyze contracts and legal documents while maintaining source attribution:
-
-[source,java]
-----
-CitationDocument contract = CitationDocument.builder()
-    .pdfFile("merger-agreement.pdf")
-    .title("Merger Agreement 2024")
-    .citationsEnabled(true)
-    .build();
-
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "What are the key termination clauses in this contract?",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .maxTokens(2000)
-            .citationDocuments(contract)
-            .build()
-    )
-);
-
-// Citations will reference specific pages in the PDF
-----
-
-==== Customer Support Knowledge Base
-
-Provide accurate customer support answers with verifiable sources:
-
-[source,java]
-----
-CitationDocument kbArticle1 = CitationDocument.builder()
-    .plainText(loadKnowledgeBaseArticle("authentication"))
-    .title("Authentication Guide")
-    .citationsEnabled(true)
-    .build();
-
-CitationDocument kbArticle2 = CitationDocument.builder()
-    .plainText(loadKnowledgeBaseArticle("billing"))
-    .title("Billing FAQ")
-    .citationsEnabled(true)
-    .build();
-
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "How do I reset my password and update my billing information?",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .citationDocuments(kbArticle1, kbArticle2)
-            .build()
-    )
-);
-
-// Citations show which KB articles were referenced
-----
-
-==== Research and Compliance
-
-Generate reports that require source citations for compliance:
-
-[source,java]
-----
-CitationDocument clinicalStudy = CitationDocument.builder()
-    .pdfFile("clinical-trial-results.pdf")
-    .title("Clinical Trial Phase III Results")
-    .citationsEnabled(true)
-    .build();
-
-CitationDocument regulatoryGuidance = CitationDocument.builder()
-    .plainText(loadRegulatoryDocument())
-    .title("FDA Guidance Document")
-    .citationsEnabled(true)
-    .build();
-
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "Summarize the efficacy findings and regulatory implications.",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .maxTokens(3000)
-            .citationDocuments(clinicalStudy, regulatoryGuidance)
-            .build()
-    )
-);
-
-// Citations provide audit trail for compliance
-----
-
 === Citation Document Options
 
 ==== Context Field
@@ -1472,7 +755,7 @@ Optionally provide context about the document that won't be cited but can guide
 
 [source,java]
 ----
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .plainText("...")
     .title("Legal Contract")
     .context("This is a merger agreement dated January 2024 between Company A and Company B")
@@ -1486,7 +769,7 @@ To enable citations, explicitly set `citationsEnabled(true)`:
 
 [source,java]
 ----
-CitationDocument document = CitationDocument.builder()
+AnthropicCitationDocument document = AnthropicCitationDocument.builder()
     .plainText("The Eiffel Tower was completed in 1889...")
     .title("Historical Facts")
     .citationsEnabled(true)  // Explicitly enable citations for this document
@@ -1497,7 +780,7 @@ You can also provide documents without citations for background context:
 
 [source,java]
 ----
-CitationDocument backgroundDoc = CitationDocument.builder()
+AnthropicCitationDocument backgroundDoc = AnthropicCitationDocument.builder()
     .plainText("Background information about the industry...")
     .title("Context Document")
     // citationsEnabled defaults to false - Claude will use this but not cite it
@@ -1510,818 +793,622 @@ Anthropic requires consistent citation settings across all documents in a reques
 You cannot mix citation-enabled and citation-disabled documents in the same request.
 ====
 
-== Skills
+== Prompt Caching
 
-Anthropic's https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview[Skills API] extends Claude's capabilities with specialized, pre-packaged abilities for document generation.
-Skills enable Claude to create actual downloadable files - Excel spreadsheets, PowerPoint presentations, Word documents, and PDFs - rather than just describing what these documents might contain.
+Anthropic's https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching[Prompt Caching] reduces costs and latency by caching repeated context across API calls. The Anthropic SDK module supports prompt caching with configurable strategies, TTL, and per-message-type settings.
 
-Skills solve a fundamental limitation of traditional LLMs:
+=== Caching Strategies
 
-* **Traditional Claude**: "Here's how your sales report would look..." (text description only)
-* **With Skills**: Creates an actual `sales_report.xlsx` file you can download and open in Excel
+Five caching strategies are available via `AnthropicCacheStrategy`:
 
-[NOTE]
-====
-*Supported Models*
+[cols="2,5", stripes=even]
+|====
+| Strategy | Description
 
-Skills are supported on Claude Sonnet 4, Claude Sonnet 4.5, Claude Opus 4, and later models.
+| `NONE` | No caching (default). No cache control headers are added.
+| `SYSTEM_ONLY` | Cache system message content. Uses 1 cache breakpoint.
+| `TOOLS_ONLY` | Cache tool definitions only. Uses 1 cache breakpoint.
+| `SYSTEM_AND_TOOLS` | Cache both system messages and tool definitions. Uses 2 cache breakpoints.
+| `CONVERSATION_HISTORY` | Cache system messages, tool definitions, and conversation messages. Uses up to 4 cache breakpoints.
+|====
 
-*Requirements*
+NOTE: Anthropic allows a maximum of 4 cache breakpoints per request. The implementation tracks breakpoint usage and stops adding cache control once the limit is reached.
 
-* Skills require the code execution capability (automatically enabled by Spring AI)
-* Maximum of 8 skills per request
-* Generated files are available for download via the Files API for 24 hours
-====
+=== Basic Usage
 
-=== Pre-built Anthropic Skills
+[source,java]
+----
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .maxTokens(1024)
+    .cacheOptions(AnthropicCacheOptions.builder()
+        .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+        .build())
+    .build();
 
-Spring AI provides type-safe access to Anthropic's pre-built skills through the `AnthropicSkill` enum:
+ChatResponse response = chatModel.call(
+    new Prompt(List.of(
+        new SystemMessage("You are an expert assistant with deep domain knowledge..."),
+        new UserMessage("What is the capital of France?")),
+        options));
+----
 
-[cols="2,3,4", stripes=even]
-|====
-| Skill | Description | Generated File Type
-
-| `XLSX`
-| Excel spreadsheet generation and manipulation
-| `.xlsx` (Microsoft Excel)
+=== Cache Configuration Options
 
-| `PPTX`
-| PowerPoint presentation creation
-| `.pptx` (Microsoft PowerPoint)
-
-| `DOCX`
-| Word document generation
-| `.docx` (Microsoft Word)
-
-| `PDF`
-| PDF document creation
-| `.pdf` (Portable Document Format)
-|====
-
-=== Basic Usage
-
-Enable skills by adding them to your `AnthropicChatOptions`:
+`AnthropicCacheOptions` provides fine-grained control over caching behavior:
 
 [source,java]
 ----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "Create an Excel spreadsheet with Q1 2025 sales data. " +
-        "Include columns for Month, Revenue, and Expenses with 3 rows of sample data.",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(4096)
-            .skill(AnthropicApi.AnthropicSkill.XLSX)
-            .build()
-    )
-);
-
-// Claude will generate an actual Excel file
-String responseText = response.getResult().getOutput().getText();
-System.out.println(responseText);
-// Output: "I've created an Excel spreadsheet with your Q1 2025 sales data..."
+var cacheOptions = AnthropicCacheOptions.builder()
+    .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
+    .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)     // 1 hour TTL
+    .messageTypeMinContentLength(MessageType.SYSTEM, 100)                   // Min 100 chars
+    .multiBlockSystemCaching(true)                                          // Per-block caching
+    .build();
 ----
 
-=== Multiple Skills
-
-You can enable multiple skills in a single request (up to 8):
+[cols="3,5,1", stripes=even]
+|====
+| Option | Description | Default
 
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        "Create a sales report with both an Excel file containing the raw data " +
-        "and a PowerPoint presentation summarizing the key findings.",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(8192)
-            .skill(AnthropicApi.AnthropicSkill.XLSX)
-            .skill(AnthropicApi.AnthropicSkill.PPTX)
-            .build()
-    )
-);
-----
+| `strategy` | The caching strategy to use. | `NONE`
+| `messageTypeTtl` | TTL per message type. Available values: `FIVE_MINUTES`, `ONE_HOUR`. | `FIVE_MINUTES` for all types
+| `messageTypeMinContentLength` | Minimum content length required before caching a message type. | `1`
+| `contentLengthFunction` | Custom function to compute content length (e.g., token counting). | `String::length`
+| `multiBlockSystemCaching` | When `true`, each system message becomes a separate cacheable block; cache control is applied to the second-to-last block (static prefix pattern). When `false`, all system messages are joined into one block. | `false`
+|====
 
-=== Using SkillContainer for Advanced Configuration
+=== Multi-Block System Caching
 
-For more control, use `SkillContainer` directly:
+When you have both a static system prompt and dynamic instructions, use multi-block system caching to cache only the static portion:
 
 [source,java]
 ----
-AnthropicApi.SkillContainer container = AnthropicApi.SkillContainer.builder()
-    .skill(AnthropicApi.AnthropicSkill.XLSX)
-    .skill(AnthropicApi.AnthropicSkill.PPTX, "20251013") // Specific version
+var cacheOptions = AnthropicCacheOptions.builder()
+    .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
+    .multiBlockSystemCaching(true)
     .build();
 
 ChatResponse response = chatModel.call(
-    new Prompt(
-        "Generate the quarterly report",
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(4096)
-            .skillContainer(container)
-            .build()
-    )
-);
-----
-
-=== Using ChatClient Fluent API
-
-Skills work seamlessly with the ChatClient fluent API:
-
-[source,java]
-----
-String response = ChatClient.create(chatModel)
-    .prompt()
-    .user("Create a PowerPoint presentation about Spring AI with 3 slides: " +
-          "Title, Key Features, and Getting Started")
-    .options(AnthropicChatOptions.builder()
-        .model("claude-sonnet-4-5")
-        .maxTokens(4096)
-        .skill(AnthropicApi.AnthropicSkill.PPTX)
-        .build())
-    .call()
-    .content();
-----
-
-=== Streaming with Skills
-
-Skills work with streaming responses:
-
-[source,java]
-----
-Flux<ChatResponse> responseFlux = chatModel.stream(
-    new Prompt(
-        "Create a Word document explaining machine learning concepts",
+    new Prompt(List.of(
+        new SystemMessage("You are an expert knowledge base assistant..."),  // Static (cached)
+        new SystemMessage("Today's date is 2025-02-23. User timezone: PST"), // Dynamic
+        new UserMessage("What are the latest updates?")),
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(4096)
-            .skill(AnthropicApi.AnthropicSkill.DOCX)
-            .build()
-    )
-);
-
-responseFlux.subscribe(response -> {
-    String content = response.getResult().getOutput().getText();
-    System.out.print(content);
-});
+            .model("claude-sonnet-4-20250514")
+            .cacheOptions(cacheOptions)
+            .build()));
 ----
 
-=== Downloading Generated Files
+=== Accessing Cache Token Usage
 
-When Claude generates files using Skills, the response contains file IDs that can be used to download the actual files via the Files API.
-Spring AI provides the `AnthropicSkillsResponseHelper` utility class for extracting file IDs and downloading files.
-
-==== Extracting File IDs
+Cache token metrics are available through the native SDK `Usage` object:
 
 [source,java]
 ----
-import org.springframework.ai.anthropic.AnthropicSkillsResponseHelper;
-
 ChatResponse response = chatModel.call(prompt);
 
-// Extract all file IDs from the response
-List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
+com.anthropic.models.messages.Usage sdkUsage =
+    (com.anthropic.models.messages.Usage) response.getMetadata().getUsage().getNativeUsage();
+long cacheCreation = sdkUsage.cacheCreationInputTokens().orElse(0L);
+long cacheRead = sdkUsage.cacheReadInputTokens().orElse(0L);
 
-for (String fileId : fileIds) {
-    System.out.println("Generated file ID: " + fileId);
-}
+System.out.println("Cache creation tokens: " + cacheCreation);
+System.out.println("Cache read tokens: " + cacheRead);
 ----
 
-==== Getting File Metadata
+On the first request, `cacheCreationInputTokens` will be non-zero (tokens written to cache). On subsequent requests with the same cached prefix, `cacheReadInputTokens` will be non-zero (tokens read from cache at reduced cost).
 
-Before downloading, you can retrieve file metadata:
+=== Conversation History Caching
+
+The `CONVERSATION_HISTORY` strategy caches the entire conversation context, including system messages, tool definitions, and the last user message. This is useful for multi-turn conversations where the growing context would otherwise be re-processed on every request:
 
 [source,java]
 ----
-@Autowired
-private AnthropicApi anthropicApi;
-
-// Get metadata for a specific file
-String fileId = fileIds.get(0);
-AnthropicApi.FileMetadata metadata = anthropicApi.getFileMetadata(fileId);
+var cacheOptions = AnthropicCacheOptions.builder()
+    .strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
+    .build();
 
-System.out.println("Filename: " + metadata.filename());     // e.g., "sales_report.xlsx"
-System.out.println("Size: " + metadata.size() + " bytes");  // e.g., 5082
-System.out.println("MIME Type: " + metadata.mimeType());    // e.g., "application/vnd..."
-----
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-20250514")
+    .cacheOptions(cacheOptions)
+    .build();
 
-==== Downloading File Content
+// First turn
+ChatResponse response1 = chatModel.call(
+    new Prompt(List.of(
+        new SystemMessage("You are a helpful assistant."),
+        new UserMessage("What is machine learning?")),
+        options));
 
-[source,java]
+// Second turn - previous context is cached
+ChatResponse response2 = chatModel.call(
+    new Prompt(List.of(
+        new SystemMessage("You are a helpful assistant."),
+        new UserMessage("What is machine learning?"),
+        new AssistantMessage(response1.getResult().getOutput().getText()),
+        new UserMessage("Can you give me an example?")),
+        options));
 ----
-// Download file content as bytes
-byte[] fileContent = anthropicApi.downloadFile(fileId);
 
-// Save to local file system
-Path outputPath = Path.of("downloads", metadata.filename());
-Files.write(outputPath, fileContent);
+== Structured Output
 
-System.out.println("Saved file to: " + outputPath);
-----
+Structured output constrains Claude to produce responses conforming to a JSON schema. The Anthropic SDK module also supports Anthropic's effort control for tuning response quality vs speed.
 
-==== Convenience Method: Download All Files
+[NOTE]
+====
+*Model Requirement*
 
-The `AnthropicSkillsResponseHelper` provides a convenience method to download all generated files at once:
+Structured output and effort control require `claude-sonnet-4-6` or newer. Older models like `claude-sonnet-4-20250514` do not support these features.
 
-[source,java]
-----
-// Download all files to a target directory
-Path targetDir = Path.of("generated-files");
-Files.createDirectories(targetDir);
+*Schema Requirements*
 
-List<Path> savedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(response, anthropicApi, targetDir);
-
-for (Path file : savedFiles) {
-    System.out.println("Downloaded: " + file.getFileName() +
-                       " (" + Files.size(file) + " bytes)");
-}
-----
+When using JSON schema output, Anthropic requires `"additionalProperties": false` for all object types in the schema.
+====
 
-==== Complete File Download Example
+=== JSON Schema Output
 
-Here's a complete example showing Skills usage with file download:
+Constrain Claude's responses to a specific JSON schema using the `outputSchema` convenience method:
 
 [source,java]
 ----
-@Service
-public class DocumentGenerationService {
-
-    private final AnthropicChatModel chatModel;
-    private final AnthropicApi anthropicApi;
-
-    public DocumentGenerationService(AnthropicChatModel chatModel, AnthropicApi anthropicApi) {
-        this.chatModel = chatModel;
-        this.anthropicApi = anthropicApi;
-    }
-
-    public Path generateSalesReport(String quarter, Path outputDir) throws IOException {
-        // Generate Excel report using Skills
-        ChatResponse response = chatModel.call(
-            new Prompt(
-                "Create an Excel spreadsheet with " + quarter + " sales data. " +
-                "Include Month, Revenue, Expenses, and Profit columns.",
-                AnthropicChatOptions.builder()
-                    .model("claude-sonnet-4-5")
-                    .maxTokens(4096)
-                    .skill(AnthropicApi.AnthropicSkill.XLSX)
-                    .build()
-            )
-        );
-
-        // Extract file IDs from the response
-        List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-
-        if (fileIds.isEmpty()) {
-            throw new RuntimeException("No file was generated");
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputSchema("""
+        {
+            "type": "object",
+            "properties": {
+                "name": {"type": "string"},
+                "capital": {"type": "string"},
+                "population": {"type": "integer"}
+            },
+            "required": ["name", "capital"],
+            "additionalProperties": false
         }
+        """)
+    .build();
 
-        // Download the generated file
-        String fileId = fileIds.get(0);
-        AnthropicApi.FileMetadata metadata = anthropicApi.getFileMetadata(fileId);
-        byte[] content = anthropicApi.downloadFile(fileId);
-
-        // Save to output directory
-        Path outputPath = outputDir.resolve(metadata.filename());
-        Files.write(outputPath, content);
-
-        return outputPath;
-    }
-}
+ChatResponse response = chatModel.call(new Prompt("Tell me about France.", options));
+// Response text will be valid JSON conforming to the schema
 ----
 
-=== Files API Operations
+=== Effort Control
 
-The `AnthropicApi` provides direct access to the Files API:
+Control how much compute Claude spends on its response. Lower effort means faster, cheaper responses; higher effort means more thorough reasoning.
 
-[cols="2,4", stripes=even]
+[cols="2,5", stripes=even]
 |====
-| Method | Description
-
-| `getFileMetadata(fileId)`
-| Get metadata including filename, size, MIME type, and expiration time
+| Effort Level | Description
 
-| `downloadFile(fileId)`
-| Download file content as byte array
-
-| `listFiles(limit, page)`
-| List files with pagination support
-
-| `deleteFile(fileId)`
-| Delete a file immediately (files auto-expire after 24 hours)
+| `LOW` | Fast and concise responses with minimal reasoning
+| `MEDIUM` | Balanced trade-off between speed and thoroughness
+| `HIGH` | More thorough reasoning and detailed responses
+| `MAX` | Maximum compute for the most thorough possible responses
 |====
 
-==== Listing Files
-
 [source,java]
 ----
-// List files with pagination
-AnthropicApi.FilesListResponse files = anthropicApi.listFiles(20, null);
-
-for (AnthropicApi.FileMetadata file : files.data()) {
-    System.out.println(file.id() + ": " + file.filename());
-}
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .effort(OutputConfig.Effort.LOW)
+    .build();
 
-// Check for more pages
-if (files.hasMore()) {
-    AnthropicApi.FilesListResponse nextPage = anthropicApi.listFiles(20, files.nextPage());
-    // Process next page...
-}
+ChatResponse response = chatModel.call(new Prompt("What is the capital of France?", options));
 ----
 
-==== Extracting Container ID
+=== Combined Schema and Effort
 
-For multi-turn conversations with Skills, you may need to extract the container ID:
+You can combine JSON schema output with effort control:
 
 [source,java]
 ----
-String containerId = AnthropicSkillsResponseHelper.extractContainerId(response);
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputSchema("""
+        {
+            "type": "object",
+            "properties": {
+                "answer": {"type": "integer"},
+                "explanation": {"type": "string"}
+            },
+            "required": ["answer", "explanation"],
+            "additionalProperties": false
+        }
+        """)
+    .effort(OutputConfig.Effort.HIGH)
+    .build();
 
-if (containerId != null) {
-    System.out.println("Container ID for reuse: " + containerId);
-}
+ChatResponse response = chatModel.call(
+    new Prompt("What is 15 * 23? Show your reasoning.", options));
 ----
 
-=== Best Practices
-
-1. **Use appropriate models**: Skills work best with Claude Sonnet 4 and later models. Ensure you're using a supported model.
-
-2. **Set sufficient max tokens**: Document generation can require significant tokens. Use `maxTokens(4096)` or higher for complex documents.
+=== Direct OutputConfig
 
-3. **Be specific in prompts**: Provide clear, detailed instructions about document structure, content, and formatting.
+For full control, use the SDK's `OutputConfig` directly:
 
-4. **Handle file downloads promptly**: Generated files expire after 24 hours. Download files soon after generation.
+[source,java]
+----
+import com.anthropic.models.messages.OutputConfig;
+import com.anthropic.models.messages.JsonOutputFormat;
+import com.anthropic.core.JsonValue;
 
-5. **Check for file IDs**: Always verify that file IDs were returned before attempting downloads. Some prompts may result in text responses without file generation.
+var outputConfig = OutputConfig.builder()
+    .effort(OutputConfig.Effort.HIGH)
+    .format(JsonOutputFormat.builder()
+        .schema(JsonOutputFormat.Schema.builder()
+            .putAdditionalProperty("type", JsonValue.from("object"))
+            .putAdditionalProperty("properties", JsonValue.from(Map.of(
+                "name", Map.of("type", "string"))))
+            .putAdditionalProperty("additionalProperties", JsonValue.from(false))
+            .build())
+        .build())
+    .build();
 
-6. **Use defensive error handling**: Wrap file operations in try-catch blocks to handle network issues or expired files gracefully.
+var options = AnthropicChatOptions.builder()
+    .model("claude-sonnet-4-6")
+    .outputConfig(outputConfig)
+    .build();
 
-[source,java]
+ChatResponse response = chatModel.call(new Prompt("Tell me about France.", options));
 ----
-List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
 
-if (fileIds.isEmpty()) {
-    // Claude may have responded with text instead of generating a file
-    String text = response.getResult().getOutput().getText();
-    log.warn("No files generated. Response: {}", text);
-    return;
-}
+=== StructuredOutputChatOptions Interface
 
-try {
-    byte[] content = anthropicApi.downloadFile(fileIds.get(0));
-    // Process file...
-} catch (Exception e) {
-    log.error("Failed to download file: {}", e.getMessage());
-}
-----
+`AnthropicChatOptions` implements the `StructuredOutputChatOptions` interface, which provides portable `getOutputSchema()` and `setOutputSchema(String)` methods. This allows structured output to work with Spring AI's generic structured output infrastructure.
 
-=== Real-World Use Cases
+== Per-Request HTTP Headers
 
-==== Automated Report Generation
+The Anthropic SDK module supports per-request HTTP headers, which are injected into individual API calls. This is distinct from `customHeaders` (which are set at the client level for all requests).
 
-Generate formatted business reports from data:
+Per-request headers are useful for:
+
+* **Request tracking**: Adding correlation IDs or trace headers per request
+* **Beta API access**: Including beta feature headers for specific requests
+* **Routing**: Adding routing or priority headers for load balancing
 
 [source,java]
 ----
-@Service
-public class ReportService {
-
-    private final AnthropicChatModel chatModel;
-    private final AnthropicApi anthropicApi;
-
-    public byte[] generateMonthlyReport(SalesData data) throws IOException {
-        String prompt = String.format(
-            "Create a PowerPoint presentation summarizing monthly sales performance. " +
-            "Total Revenue: $%,.2f, Total Expenses: $%,.2f, Net Profit: $%,.2f. " +
-            "Include charts and key insights. Create 5 slides: " +
-            "1) Title, 2) Revenue Overview, 3) Expense Breakdown, " +
-            "4) Profit Analysis, 5) Recommendations.",
-            data.revenue(), data.expenses(), data.profit()
-        );
-
-        ChatResponse response = chatModel.call(
-            new Prompt(prompt,
-                AnthropicChatOptions.builder()
-                    .model("claude-sonnet-4-5")
-                    .maxTokens(8192)
-                    .skill(AnthropicApi.AnthropicSkill.PPTX)
-                    .build()
-            )
-        );
+var options = AnthropicChatOptions.builder()
+    .httpHeaders(Map.of(
+        "X-Request-Id", "req-12345",
+        "X-Custom-Tracking", "my-tracking-value"))
+    .build();
 
-        List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-        return anthropicApi.downloadFile(fileIds.get(0));
-    }
-}
+ChatResponse response = chatModel.call(new Prompt("Hello", options));
 ----
 
-==== Data Export Service
+NOTE: `httpHeaders` are per-request and set via `MessageCreateParams.putAdditionalHeader()`. They do not affect other requests. For headers that should apply to all requests, use `customHeaders` instead.
+
+== Sample Controller
 
-Export structured data to Excel format:
+Here is an example of a simple `@RestController` class that uses the chat model for text generations:
 
 [source,java]
 ----
 @RestController
-public class ExportController {
+public class ChatController {
 
     private final AnthropicChatModel chatModel;
-    private final AnthropicApi anthropicApi;
-    private final CustomerRepository customerRepository;
 
-    @GetMapping("/export/customers")
-    public ResponseEntity<byte[]> exportCustomers() throws IOException {
-        List<Customer> customers = customerRepository.findAll();
-
-        String dataDescription = customers.stream()
-            .map(c -> String.format("%s, %s, %s", c.name(), c.email(), c.tier()))
-            .collect(Collectors.joining("\n"));
+    public ChatController() {
+        var options = AnthropicChatOptions.builder()
+            .model("claude-sonnet-4-20250514")
+            .maxTokens(1024)
+            .apiKey(System.getenv("ANTHROPIC_API_KEY"))
+            .build();
+        this.chatModel = new AnthropicChatModel(options);
+    }
 
-        ChatResponse response = chatModel.call(
-            new Prompt(
-                "Create an Excel spreadsheet with customer data. " +
-                "Columns: Name, Email, Tier. Format the header row with bold text. " +
-                "Data:\n" + dataDescription,
-                AnthropicChatOptions.builder()
-                    .model("claude-sonnet-4-5")
-                    .maxTokens(4096)
-                    .skill(AnthropicApi.AnthropicSkill.XLSX)
-                    .build()
-            )
-        );
+    @GetMapping("/ai/generate")
+    public Map<String, String> generate(
+            @RequestParam(value = "message", defaultValue = "Tell me a joke") String message) {
+        return Map.of("generation", chatModel.call(message));
+    }
 
-        List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-        byte[] content = anthropicApi.downloadFile(fileIds.get(0));
-        AnthropicApi.FileMetadata metadata = anthropicApi.getFileMetadata(fileIds.get(0));
-
-        return ResponseEntity.ok()
-            .header(HttpHeaders.CONTENT_DISPOSITION,
-                    "attachment; filename=\"" + metadata.filename() + "\"")
-            .contentType(MediaType.parseMediaType(metadata.mimeType()))
-            .body(content);
+    @GetMapping("/ai/generateStream")
+    public Flux<ChatResponse> generateStream(
+            @RequestParam(value = "message", defaultValue = "Tell me a joke") String message) {
+        Prompt prompt = new Prompt(new UserMessage(message));
+        return chatModel.stream(prompt);
     }
 }
 ----
 
-==== Multi-Format Document Generation
+== Accessing the Raw Response
 
-Generate multiple document formats from a single request:
+The full Anthropic SDK `Message` object is available in the response metadata under the `"anthropic-response"` key. This provides access to any fields not explicitly mapped by Spring AI's abstraction:
 
 [source,java]
 ----
-public Map<String, byte[]> generateProjectDocumentation(ProjectInfo project) throws IOException {
-    ChatResponse response = chatModel.call(
-        new Prompt(
-            "Create project documentation for: " + project.name() + "\n" +
-            "Description: " + project.description() + "\n\n" +
-            "Generate:\n" +
-            "1. An Excel file with the project timeline and milestones\n" +
-            "2. A PowerPoint overview presentation (3-5 slides)\n" +
-            "3. A Word document with detailed specifications",
-            AnthropicChatOptions.builder()
-                .model("claude-sonnet-4-5")
-                .maxTokens(16384)
-                .skill(AnthropicApi.AnthropicSkill.XLSX)
-                .skill(AnthropicApi.AnthropicSkill.PPTX)
-                .skill(AnthropicApi.AnthropicSkill.DOCX)
-                .build()
-        )
-    );
-
-    Map<String, byte[]> documents = new HashMap<>();
-    List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
-
-    for (String fileId : fileIds) {
-        AnthropicApi.FileMetadata metadata = anthropicApi.getFileMetadata(fileId);
-        byte[] content = anthropicApi.downloadFile(fileId);
-        documents.put(metadata.filename(), content);
-    }
+ChatResponse response = chatModel.call(new Prompt("Hello"));
 
-    return documents;
-}
+com.anthropic.models.messages.Message rawMessage =
+    (com.anthropic.models.messages.Message) response.getMetadata().get("anthropic-response");
+
+// Access native SDK fields
+rawMessage.stopReason();    // Optional<StopReason>
+rawMessage.content();       // List<ContentBlock>
+rawMessage.usage();         // Usage with cache token details
 ----
 
-=== Combining Skills with Other Features
+NOTE: The raw response is available for synchronous calls only. Streaming responses do not include it.
 
-Skills can be combined with other Anthropic features like Prompt Caching:
+== Skills
 
-[source,java]
-----
-ChatResponse response = chatModel.call(
-    new Prompt(
-        List.of(
-            new SystemMessage("You are an expert data analyst and document creator..."),
-            new UserMessage("Create a financial summary spreadsheet")
-        ),
-        AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(4096)
-            .skill(AnthropicApi.AnthropicSkill.XLSX)
-            .cacheOptions(AnthropicCacheOptions.builder()
-                .strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
-                .build())
-            .build()
-    )
-);
-----
+Anthropic's https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview[Skills API] extends Claude's capabilities with specialized, pre-packaged abilities for document generation.
+Skills enable Claude to create actual downloadable files -- Excel spreadsheets, PowerPoint presentations, Word documents, and PDFs -- rather than just describing what these documents might contain.
 
-=== Custom Skills
+[NOTE]
+====
+*Supported Models*
 
-In addition to pre-built skills, Anthropic supports custom skills that you can create for specialized document templates, formatting rules, or domain-specific behaviors.
-Custom skills are `SKILL.md` files with instructions that you upload to your Anthropic workspace.
-Once uploaded, you can use them in Spring AI alongside pre-built skills.
+Skills are supported on Claude Sonnet 4, Claude Sonnet 4.5, Claude Opus 4, and later models.
 
-Custom skills are ideal for:
+*Requirements*
 
-* **Corporate branding**: Apply consistent headers, footers, logos, and color schemes
-* **Compliance requirements**: Add required disclaimers, confidentiality notices, or audit trails
-* **Document templates**: Enforce specific structures for reports, proposals, or specifications
-* **Domain expertise**: Include industry-specific terminology, calculations, or formatting rules
+* Skills require the code execution capability (automatically enabled by Spring AI when skills are configured)
+* Maximum of 8 skills per request
+* Generated files are available for download via the Files API for 24 hours
+====
 
-For details on creating custom skills, refer to the https://platform.claude.com/docs/en/api/skills-guide[Anthropic Skills API documentation].
+=== Pre-built Anthropic Skills
 
-==== Uploading a Custom Skill
+Spring AI provides type-safe access to Anthropic's pre-built skills through the `AnthropicSkill` enum:
 
-Upload your skill using the Anthropic API.
-Note the specific format requirements for the `files[]` parameter:
+[cols="2,3,4", stripes=even]
+|====
+| Skill | Description | Generated File Type
 
-[source,bash]
-----
-curl -X POST "https://api.anthropic.com/v1/skills" \
-  -H "x-api-key: $ANTHROPIC_API_KEY" \
-  -H "anthropic-version: 2023-06-01" \
-  -H "anthropic-beta: skills-2025-10-02" \
-  -F "display_title=My Custom Skill" \
-  -F "files[]=@SKILL.md;filename=my-skill-name/SKILL.md"
-----
+| `XLSX`
+| Excel spreadsheet generation and manipulation
+| `.xlsx` (Microsoft Excel)
 
-[IMPORTANT]
-====
-* Use `files[]=` (with square brackets), not `files=`
-* The `filename` parameter must include a directory matching the `name` field in your SKILL.md YAML frontmatter
-* After uploading, verify your skill appears in the Anthropic Console under **Settings > Capabilities**
-====
+| `PPTX`
+| PowerPoint presentation creation
+| `.pptx` (Microsoft PowerPoint)
 
-The response contains your skill ID:
+| `DOCX`
+| Word document generation
+| `.docx` (Microsoft Word)
 
-[source,json]
-----
-{
-  "id": "skill_01AbCdEfGhIjKlMnOpQrStUv",
-  "display_title": "My Custom Skill",
-  "source": "custom",
-  "latest_version": "1765845644409101"
-}
-----
+| `PDF`
+| PDF document creation
+| `.pdf` (Portable Document Format)
+|====
 
-==== Using Custom Skills in Spring AI
+=== Basic Usage
 
-Reference your custom skill by its ID using the `.skill()` method:
+Enable skills by adding them to your `AnthropicChatOptions`:
 
 [source,java]
 ----
 ChatResponse response = chatModel.call(
     new Prompt(
-        "Create a quarterly sales report",
+        "Create an Excel spreadsheet with Q1 2025 sales data. " +
+        "Include columns for Month, Revenue, and Expenses with 3 rows of sample data.",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
+            .model(Model.CLAUDE_SONNET_4_5)
             .maxTokens(4096)
-            .skill("skill_01AbCdEfGhIjKlMnOpQrStUv")
+            .skill(AnthropicSkill.XLSX)
             .build()
     )
 );
+
+// Claude will generate an actual Excel file
+String responseText = response.getResult().getOutput().getText();
+System.out.println(responseText);
+// Output: "I've created an Excel spreadsheet with your Q1 2025 sales data..."
 ----
 
-==== Combining Pre-built and Custom Skills
+=== Multiple Skills
 
-You can use both pre-built and custom skills in the same request.
-This allows you to leverage Anthropic's document generation capabilities while applying your organization's specific requirements:
+You can enable multiple skills in a single request (up to 8):
 
 [source,java]
 ----
 ChatResponse response = chatModel.call(
     new Prompt(
-        "Create a sales report spreadsheet",
+        "Create a sales report with both an Excel file containing the raw data " +
+        "and a PowerPoint presentation summarizing the key findings.",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(4096)
-            .skill(AnthropicApi.AnthropicSkill.XLSX)       // Pre-built
-            .skill("skill_01AbCdEfGhIjKlMnOpQrStUv")       // Your custom skill
+            .model(Model.CLAUDE_SONNET_4_5)
+            .maxTokens(8192)
+            .skill(AnthropicSkill.XLSX)
+            .skill(AnthropicSkill.PPTX)
             .build()
     )
 );
 ----
 
-==== Using SkillContainer with Custom Skills
+=== Using AnthropicSkillContainer for Advanced Configuration
 
-For more control over skill versions, use `SkillContainer` directly:
+For more control over skill types and versions, use `AnthropicSkillContainer` directly:
 
 [source,java]
 ----
-AnthropicApi.SkillContainer container = AnthropicApi.SkillContainer.builder()
-    .skill(AnthropicApi.AnthropicSkill.XLSX)
-    .skill("skill_01AbCdEfGhIjKlMnOpQrStUv")                    // Uses latest version
-    .skill("skill_02XyZaBcDeFgHiJkLmNoPq", "1765845644409101")  // Specific version
+AnthropicSkillContainer container = AnthropicSkillContainer.builder()
+    .skill(AnthropicSkill.XLSX)
+    .skill(AnthropicSkill.PPTX, "20251013") // Specific version
     .build();
 
 ChatResponse response = chatModel.call(
     new Prompt(
-        "Generate the report",
+        "Generate the quarterly report",
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-5")
-            .maxTokens(8192)
+            .model(Model.CLAUDE_SONNET_4_5)
+            .maxTokens(4096)
             .skillContainer(container)
             .build()
     )
 );
 ----
 
-==== Updating a Custom Skill
-
-To update an existing skill, upload a new version to the `/versions` endpoint:
-
-[source,bash]
-----
-curl -X POST "https://api.anthropic.com/v1/skills/YOUR_SKILL_ID/versions" \
-  -H "x-api-key: $ANTHROPIC_API_KEY" \
-  -H "anthropic-version: 2023-06-01" \
-  -H "anthropic-beta: skills-2025-10-02" \
-  -F "files[]=@SKILL.md;filename=my-skill-name/SKILL.md"
-----
-
-When using `latest` as the version (the default), the new version is picked up automatically.
+=== Downloading Generated Files
 
-==== Complete Custom Skills Example
+When Claude generates files using Skills, the response contains file IDs that can be used to download the actual files via the Files API.
+Spring AI provides the `AnthropicSkillsResponseHelper` utility class for extracting file IDs and downloading files.
 
-Here's a complete example showing a service that optionally applies a custom branding skill:
+==== Extracting File IDs
 
 [source,java]
 ----
-@Service
-public class BrandedDocumentService {
+import org.springframework.ai.anthropic.AnthropicSkillsResponseHelper;
 
-    private static final String BRANDING_SKILL_ID = "skill_01AbCdEfGhIjKlMnOpQrStUv";
+ChatResponse response = chatModel.call(prompt);
 
-    private final AnthropicChatModel chatModel;
-    private final AnthropicApi anthropicApi;
+// Extract all file IDs from the response
+List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
 
-    public BrandedDocumentService(AnthropicChatModel chatModel, AnthropicApi anthropicApi) {
-        this.chatModel = chatModel;
-        this.anthropicApi = anthropicApi;
-    }
+for (String fileId : fileIds) {
+    System.out.println("Generated file ID: " + fileId);
+}
+----
 
-    public byte[] generateReport(String prompt, boolean includeBranding) throws IOException {
-        // Build options with document skill
-        AnthropicChatOptions.Builder optionsBuilder = AnthropicChatOptions.builder()
-                .model("claude-sonnet-4-5")
-                .maxTokens(8192)
-                .skill(AnthropicApi.AnthropicSkill.XLSX);
+==== Downloading All Files
 
-        // Add custom branding skill if requested
-        if (includeBranding) {
-            optionsBuilder.skill(BRANDING_SKILL_ID);
-        }
+The `AnthropicSkillsResponseHelper` provides a convenience method to download all generated files at once.
+This requires the `AnthropicClient` instance (the same one used to create the chat model):
 
-        ChatResponse response = chatModel.call(
-            new Prompt(prompt, optionsBuilder.build())
-        );
+[source,java]
+----
+import com.anthropic.client.AnthropicClient;
 
-        // Extract and download the generated file
-        List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
+@Autowired
+private AnthropicClient anthropicClient;
 
-        if (fileIds.isEmpty()) {
-            throw new RuntimeException("No file was generated");
-        }
+// Download all files to a target directory
+Path targetDir = Path.of("generated-files");
+Files.createDirectories(targetDir);
 
-        return anthropicApi.downloadFile(fileIds.get(0));
-    }
+List<Path> savedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(
+        response, anthropicClient, targetDir);
+
+for (Path file : savedFiles) {
+    System.out.println("Downloaded: " + file.getFileName() +
+                       " (" + Files.size(file) + " bytes)");
 }
 ----
 
-== Sample Controller
-
-https://start.spring.io/[Create] a new Spring Boot project and add the `spring-ai-starter-model-anthropic` to your pom (or gradle) dependencies.
+==== Extracting Container ID
 
-Add a `application.properties` file, under the `src/main/resources` directory, to enable and configure the Anthropic chat model:
+For multi-turn conversations with Skills, you can extract the container ID for reuse:
 
-[source,application.properties]
+[source,java]
 ----
-spring.ai.anthropic.api-key=YOUR_API_KEY
-spring.ai.anthropic.chat.options.model=claude-3-5-sonnet-latest
-spring.ai.anthropic.chat.options.temperature=0.7
-spring.ai.anthropic.chat.options.max-tokens=450
+String containerId = AnthropicSkillsResponseHelper.extractContainerId(response);
+
+if (containerId != null) {
+    System.out.println("Container ID for reuse: " + containerId);
+}
 ----
 
-TIP: Replace the `api-key` with your Anthropic credentials.
+=== Complete Example
 
-This will create a `AnthropicChatModel` implementation that you can inject into your class.
-Here is an example of a simple `@Controller` class that uses the chat model for text generations.
+Here's a complete example showing Skills usage with file download:
 
 [source,java]
 ----
-@RestController
-public class ChatController {
+@Service
+public class DocumentGenerationService {
 
     private final AnthropicChatModel chatModel;
+    private final AnthropicClient anthropicClient;
 
-    @Autowired
-    public ChatController(AnthropicChatModel chatModel) {
+    public DocumentGenerationService(AnthropicChatModel chatModel,
+                                     AnthropicClient anthropicClient) {
         this.chatModel = chatModel;
+        this.anthropicClient = anthropicClient;
     }
 
-    @GetMapping("/ai/generate")
-    public Map generate(@RequestParam(value = "message", defaultValue = "Tell me a joke") String message) {
-        return Map.of("generation", this.chatModel.call(message));
-    }
+    public Path generateSalesReport(String quarter, Path outputDir) throws IOException {
+        // Generate Excel report using Skills
+        ChatResponse response = chatModel.call(
+            new Prompt(
+                "Create an Excel spreadsheet with " + quarter + " sales data. " +
+                "Include Month, Revenue, Expenses, and Profit columns.",
+                AnthropicChatOptions.builder()
+                    .model(Model.CLAUDE_SONNET_4_5)
+                    .maxTokens(4096)
+                    .skill(AnthropicSkill.XLSX)
+                    .build()
+            )
+        );
 
-    @GetMapping("/ai/generateStream")
-	public Flux<ChatResponse> generateStream(@RequestParam(value = "message", defaultValue = "Tell me a joke") String message) {
-        Prompt prompt = new Prompt(new UserMessage(message));
-        return this.chatModel.stream(prompt);
+        // Extract file IDs from the response
+        List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
+
+        if (fileIds.isEmpty()) {
+            throw new RuntimeException("No file was generated");
+        }
+
+        // Download all generated files
+        List<Path> savedFiles = AnthropicSkillsResponseHelper.downloadAllFiles(
+                response, anthropicClient, outputDir);
+
+        return savedFiles.get(0);
     }
 }
 ----
 
-== Manual Configuration
-
-The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java[AnthropicChatModel] implements the `ChatModel` and `StreamingChatModel` and uses the <<low-level-api>> to connect to the Anthropic service.
+=== Best Practices
 
-Add the `spring-ai-anthropic` dependency to your project's Maven `pom.xml` file:
+1. **Use appropriate models**: Skills work best with Claude Sonnet 4 and later models. Ensure you're using a supported model.
 
-[source, xml]
-----
-<dependency>
-    <groupId>org.springframework.ai</groupId>
-    <artifactId>spring-ai-anthropic</artifactId>
-</dependency>
-----
+2. **Set sufficient max tokens**: Document generation can require significant tokens. Use `maxTokens(4096)` or higher for complex documents.
 
-or to your Gradle `build.gradle` build file.
+3. **Be specific in prompts**: Provide clear, detailed instructions about document structure, content, and formatting.
 
-[source,groovy]
-----
-dependencies {
-    implementation 'org.springframework.ai:spring-ai-anthropic'
-}
-----
+4. **Handle file downloads promptly**: Generated files expire after 24 hours. Download files soon after generation.
 
-TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
+5. **Check for file IDs**: Always verify that file IDs were returned before attempting downloads. Some prompts may result in text responses without file generation.
 
-Next, create a `AnthropicChatModel` and use it for text generations:
+6. **Use defensive error handling**: Wrap file operations in try-catch blocks to handle network issues or expired files gracefully.
 
 [source,java]
 ----
-var anthropicApi = new AnthropicApi(System.getenv("ANTHROPIC_API_KEY"));
-var anthropicChatOptions = AnthropicChatOptions.builder()
-            .model("claude-sonnet-4-6")
-            .temperature(0.4)
-            .maxTokens(200)
-        .build()
-var chatModel = AnthropicChatModel.builder().anthropicApi(anthropicApi)
-                .defaultOptions(anthropicChatOptions).build();
+List<String> fileIds = AnthropicSkillsResponseHelper.extractFileIds(response);
 
-ChatResponse response = this.chatModel.call(
-    new Prompt("Generate the names of 5 famous pirates."));
+if (fileIds.isEmpty()) {
+    // Claude may have responded with text instead of generating a file
+    String text = response.getResult().getOutput().getText();
+    log.warn("No files generated. Response: {}", text);
+    return;
+}
 
-// Or with streaming responses
-Flux<ChatResponse> response = this.chatModel.stream(
-    new Prompt("Generate the names of 5 famous pirates."));
+try {
+    List<Path> files = AnthropicSkillsResponseHelper.downloadAllFiles(
+            response, anthropicClient, targetDir);
+    // Process files...
+} catch (IOException e) {
+    log.error("Failed to download file: {}", e.getMessage());
+}
 ----
 
-The `AnthropicChatOptions` provides the configuration information for the chat requests.
-The `AnthropicChatOptions.Builder` is fluent options builder.
-
-== Low-level AnthropicApi Client [[low-level-api]]
-
-The https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java[AnthropicApi] provides is lightweight Java client for link:https://docs.anthropic.com/claude/reference/messages_post[Anthropic Message API].
+== Observability
 
-Following class diagram illustrates the `AnthropicApi` chat interfaces and building blocks:
+The Anthropic SDK implementation supports Spring AI's observability features through Micrometer.
+All chat model operations are instrumented for monitoring and tracing.
 
-image::anthropic-claude3-class-diagram.jpg[AnthropicApi Chat API Diagram, width=1000, align="center"]
+== Logging
 
-image::anthropic-claude3-events-model.jpg[AnthropicApi Event Model, width=1000, align="center"]
+Enable SDK logging by setting the environment variable:
 
-Here is a simple snippet how to use the api programmatically:
-
-[source,java]
+[source,bash]
+----
+export ANTHROPIC_LOG=debug
 ----
-AnthropicApi anthropicApi =
-    new AnthropicApi(System.getenv("ANTHROPIC_API_KEY"));
 
-AnthropicMessage chatCompletionMessage = new AnthropicMessage(
-        List.of(new ContentBlock("Tell me a Joke?")), Role.USER);
+== Limitations
 
-// Sync request
-ResponseEntity<ChatCompletionResponse> response = this.anthropicApi
-    .chatCompletionEntity(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(),
-            List.of(this.chatCompletionMessage), null, 100, 0.8, false));
+The following features are not yet supported:
 
-// Streaming request
-Flux<StreamResponse> response = this.anthropicApi
-    .chatCompletionStream(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(),
-            List.of(this.chatCompletionMessage), null, 100, 0.8, true));
-----
+* Amazon Bedrock backend
+* Google Vertex AI backend
+
+These features are planned for future releases.
 
-Follow the https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java[AnthropicApi.java]'s JavaDoc for further information.
+== Additional Resources
 
-=== Low-level API Examples
-* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/api/AnthropicApiIT.java[AnthropicApiIT.java] test provides some general examples how to use the lightweight library.
+* link:https://github.com/anthropics/anthropic-sdk-java[Official Anthropic Java SDK]
+* link:https://docs.anthropic.com/[Anthropic API Documentation]
+* link:https://docs.anthropic.com/en/docs/about-claude/models[Claude Models]
diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-model-anthropic/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-model-anthropic/pom.xml
index f4c0a4fda35..cfe50ff4b3d 100644
--- a/spring-ai-spring-boot-starters/spring-ai-starter-model-anthropic/pom.xml
+++ b/spring-ai-spring-boot-starters/spring-ai-starter-model-anthropic/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  ~ Copyright 2023-present the original author or authors.
+  ~ Copyright 2025-2025 the original author or authors.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License");
   ~ you may not use this file except in compliance with the License.
@@ -42,16 +42,6 @@
             <artifactId>spring-boot-starter</artifactId>
         </dependency>
 
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-starter-webclient</artifactId>
-        </dependency>
-
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-starter-restclient</artifactId>
-        </dependency>
-
         <dependency>
             <groupId>org.springframework.ai</groupId>
             <artifactId>spring-ai-autoconfigure-model-anthropic</artifactId>