diff --git a/python/schema/tool_inputs.py b/python/schema/tool_inputs.py index d57d860..964febf 100644 --- a/python/schema/tool_inputs.py +++ b/python/schema/tool_inputs.py @@ -143,6 +143,177 @@ class ErrorTrackingListSchema(BaseModel): status: Status | None = None +class Type(StrEnum): + """ + Experiment type: 'product' for backend/API changes, 'web' for frontend UI changes + """ + + PRODUCT = "product" + WEB = "web" + + +class MetricType(StrEnum): + """ + Metric type: 'mean' for average values (revenue, time spent), 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + + MEAN = "mean" + FUNNEL = "funnel" + RATIO = "ratio" + + +class PrimaryMetric(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType + """ + Metric type: 'mean' for average values (revenue, time spent), 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + event_name: str + """ + REQUIRED for metrics to work: PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase'). For funnels, this is the first step. Use '$pageview' if unsure. Search project-property-definitions tool for available events. + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names for each funnel step (e.g., ['product_view', 'add_to_cart', 'checkout', 'purchase']) + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this metric measures and why it's important for the experiment + """ + + +class MetricType1(StrEnum): + """ + Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + + MEAN = "mean" + FUNNEL = "funnel" + RATIO = "ratio" + + +class SecondaryMetric(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType1 + """ + Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + event_name: str + """ + REQUIRED: PostHog event name. Use '$pageview' if unsure. + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names for each funnel step + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this secondary metric measures + """ + + +class Variant(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + key: str + """ + Variant key (e.g., 'control', 'variant_a', 'new_design') + """ + name: str | None = None + """ + Human-readable variant name + """ + rollout_percentage: Annotated[float, Field(ge=0.0, le=100.0)] + """ + Percentage of users to show this variant + """ + + +class ExperimentCreateSchema(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: Annotated[str, Field(min_length=1)] + """ + Experiment name - should clearly describe what is being tested + """ + description: str | None = None + """ + Detailed description of the experiment hypothesis, what changes are being tested, and expected outcomes + """ + feature_flag_key: str + """ + Feature flag key (letters, numbers, hyphens, underscores only). IMPORTANT: First search for existing feature flags that might be suitable using the feature-flags-get-all tool, then suggest reusing existing ones or creating a new key based on the experiment name + """ + type: Type | None = Type.PRODUCT + """ + Experiment type: 'product' for backend/API changes, 'web' for frontend UI changes + """ + primary_metrics: list[PrimaryMetric] | None = None + """ + Primary metrics to measure experiment success. IMPORTANT: Each metric needs event_name to track data. For funnels, provide funnel_steps array with event names for each step. Ask user what events they track, or use project-property-definitions to find available events. + """ + secondary_metrics: list[SecondaryMetric] | None = None + """ + Secondary metrics to monitor for potential side effects or additional insights. Each metric needs event_name. + """ + variants: list[Variant] | None = None + """ + Experiment variants. If not specified, defaults to 50/50 control/test split. Ask user how many variants they need and what each tests + """ + minimum_detectable_effect: float | None = 30 + """ + Minimum detectable effect in percentage. Lower values require more users but detect smaller changes. Suggest 20-30% for most experiments + """ + filter_test_accounts: bool | None = True + """ + Whether to filter out internal test accounts + """ + target_properties: dict[str, Any] | None = None + """ + Properties to target specific user segments (e.g., country, subscription type) + """ + draft: bool | None = True + """ + Create as draft (true) or launch immediately (false). Recommend draft for review first + """ + holdout_id: float | None = None + """ + Holdout group ID if this experiment should exclude users from other experiments + """ + + +class ExperimentDeleteSchema(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + experimentId: float + """ + The ID of the experiment to delete + """ + + class ExperimentGetAllSchema(BaseModel): pass model_config = ConfigDict( @@ -160,6 +331,292 @@ class ExperimentGetSchema(BaseModel): """ +class ExperimentResultsGetSchema(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + experimentId: float + """ + The ID of the experiment to get comprehensive results for + """ + refresh: bool + """ + Force refresh of results instead of using cached values + """ + + +class PrimaryMetric1(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType1 + """ + Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + event_name: str + """ + PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase') + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names for each funnel step + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this metric measures + """ + + +class MetricType3(StrEnum): + """ + Metric type + """ + + MEAN = "mean" + FUNNEL = "funnel" + RATIO = "ratio" + + +class SecondaryMetric1(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType3 + """ + Metric type + """ + event_name: str + """ + PostHog event name + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this metric measures + """ + + +class Conclude(StrEnum): + """ + Conclude experiment with result + """ + + WON = "won" + LOST = "lost" + INCONCLUSIVE = "inconclusive" + STOPPED_EARLY = "stopped_early" + INVALID = "invalid" + + +class ExperimentUpdateInputSchema(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Update experiment name + """ + description: str | None = None + """ + Update experiment description + """ + primary_metrics: list[PrimaryMetric1] | None = None + """ + Update primary metrics + """ + secondary_metrics: list[SecondaryMetric1] | None = None + """ + Update secondary metrics + """ + minimum_detectable_effect: float | None = None + """ + Update minimum detectable effect in percentage + """ + launch: bool | None = None + """ + Launch experiment (set start_date) or keep as draft + """ + conclude: Conclude | None = None + """ + Conclude experiment with result + """ + conclusion_comment: str | None = None + """ + Comment about experiment conclusion + """ + restart: bool | None = None + """ + Restart concluded experiment (clears end_date and conclusion) + """ + archive: bool | None = None + """ + Archive or unarchive experiment + """ + + +class MetricType4(StrEnum): + """ + Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + + MEAN = "mean" + FUNNEL = "funnel" + RATIO = "ratio" + + +class PrimaryMetric2(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType4 + """ + Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics + """ + event_name: str + """ + PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase') + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names for each funnel step + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this metric measures + """ + + +class MetricType5(StrEnum): + """ + Metric type + """ + + MEAN = "mean" + FUNNEL = "funnel" + RATIO = "ratio" + + +class SecondaryMetric2(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Human-readable metric name + """ + metric_type: MetricType5 + """ + Metric type + """ + event_name: str + """ + PostHog event name + """ + funnel_steps: list[str] | None = None + """ + For funnel metrics only: Array of event names + """ + properties: dict[str, Any] | None = None + """ + Event properties to filter on + """ + description: str | None = None + """ + What this metric measures + """ + + +class Data4(BaseModel): + """ + The experiment data to update using user-friendly format + """ + + model_config = ConfigDict( + extra="forbid", + ) + name: str | None = None + """ + Update experiment name + """ + description: str | None = None + """ + Update experiment description + """ + primary_metrics: list[PrimaryMetric2] | None = None + """ + Update primary metrics + """ + secondary_metrics: list[SecondaryMetric2] | None = None + """ + Update secondary metrics + """ + minimum_detectable_effect: float | None = None + """ + Update minimum detectable effect in percentage + """ + launch: bool | None = None + """ + Launch experiment (set start_date) or keep as draft + """ + conclude: Conclude | None = None + """ + Conclude experiment with result + """ + conclusion_comment: str | None = None + """ + Comment about experiment conclusion + """ + restart: bool | None = None + """ + Restart concluded experiment (clears end_date and conclusion) + """ + archive: bool | None = None + """ + Archive or unarchive experiment + """ + + +class ExperimentUpdateSchema(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + experimentId: float + """ + The ID of the experiment to update + """ + data: Data4 + """ + The experiment data to update using user-friendly format + """ + + class Operator(StrEnum): EXACT = "exact" IS_NOT = "is_not" @@ -270,7 +727,7 @@ class Filters1(BaseModel): groups: list[Group1] -class Data4(BaseModel): +class Data5(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -286,7 +743,7 @@ class FeatureFlagUpdateSchema(BaseModel): extra="forbid", ) flagKey: str - data: Data4 + data: Data5 class Kind(StrEnum): @@ -305,7 +762,7 @@ class Query(BaseModel): """ -class Data5(BaseModel): +class Data6(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -320,7 +777,7 @@ class InsightCreateSchema(BaseModel): model_config = ConfigDict( extra="forbid", ) - data: Data5 + data: Data6 class InsightDeleteSchema(BaseModel): @@ -340,7 +797,7 @@ class InsightGenerateHogQLFromQuestionSchema(BaseModel): """ -class Data6(BaseModel): +class Data7(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -354,7 +811,7 @@ class InsightGetAllSchema(BaseModel): model_config = ConfigDict( extra="forbid", ) - data: Data6 | None = None + data: Data7 | None = None class InsightGetSchema(BaseModel): @@ -382,7 +839,7 @@ class Query1(BaseModel): """ -class Data7(BaseModel): +class Data8(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -400,7 +857,7 @@ class InsightUpdateSchema(BaseModel): extra="forbid", ) insightId: str - data: Data7 + data: Data8 class LLMAnalyticsGetCostsSchema(BaseModel): @@ -449,7 +906,7 @@ class ProjectGetAllSchema(BaseModel): ) -class Type(StrEnum): +class Type1(StrEnum): """ Type of properties to get """ @@ -462,7 +919,7 @@ class ProjectPropertyDefinitionsInputSchema(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type + type: Type1 """ Type of properties to get """ @@ -502,7 +959,7 @@ class Properties(BaseModel): type: str | None = None -class Type1(StrEnum): +class Type2(StrEnum): AND_ = "AND" OR_ = "OR" @@ -521,7 +978,7 @@ class Properties1(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -529,7 +986,7 @@ class Properties2(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -573,7 +1030,7 @@ class Properties4(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -581,7 +1038,7 @@ class Properties5(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -671,7 +1128,7 @@ class Properties7(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -679,7 +1136,7 @@ class Properties8(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -697,7 +1154,7 @@ class Properties10(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -705,7 +1162,7 @@ class Properties11(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -822,7 +1279,7 @@ class Properties13(BaseModel): model_config = ConfigDict( extra="forbid", ) - type: Type1 + type: Type2 values: list[Value] @@ -859,7 +1316,7 @@ class QueryRunInputSchema(BaseModel): query: Query2 | Query3 -class Type10(StrEnum): +class Type11(StrEnum): POPOVER = "popover" API = "api" WIDGET = "widget" @@ -1238,7 +1695,7 @@ class SurveyCreateSchema(BaseModel): ) name: Annotated[str, Field(min_length=1)] description: str | None = None - type: Type10 | None = None + type: Type11 | None = None questions: Annotated[ list[Questions | Questions1 | Questions2 | Questions3 | Questions4 | Questions5], Field(min_length=1), @@ -1734,7 +2191,7 @@ class SurveyUpdateSchema(BaseModel): ) name: Annotated[str | None, Field(min_length=1)] = None description: str | None = None - type: Type10 | None = None + type: Type11 | None = None questions: Annotated[ list[Questions6 | Questions7 | Questions8 | Questions9 | Questions10 | Questions11] | None, Field(min_length=1), diff --git a/schema/tool-definitions.json b/schema/tool-definitions.json index a55ef73..1058c4d 100644 --- a/schema/tool-definitions.json +++ b/schema/tool-definitions.json @@ -209,6 +209,48 @@ "readOnlyHint": true } }, + "experiment-create": { + "description": "Create a comprehensive A/B test experiment. PROCESS: 1) Understand experiment goal and hypothesis 2) Search existing feature flags with 'feature-flags-get-all' tool first and suggest reuse or new key 3) Help user define success metrics by asking what they want to optimize 4) MOST IMPORTANT: Use 'event-definitions-list' tool to find available events in their project 5) For funnel metrics, ask for specific event sequence (e.g., ['product_view', 'add_to_cart', 'purchase']) and use funnel_steps parameter 6) Configure variants (default 50/50 control/test unless they specify otherwise) 7) Set targeting criteria if needed.", + "category": "Experiments", + "feature": "experiments", + "summary": "Create A/B test experiment with guided metric and feature flag setup", + "title": "Create experiment", + "required_scopes": ["experiment:write"], + "annotations": { + "destructiveHint": false, + "idempotentHint": false, + "openWorldHint": true, + "readOnlyHint": false + } + }, + "experiment-delete": { + "description": "Delete an experiment by ID.", + "category": "Experiments", + "feature": "experiments", + "summary": "Delete an experiment by ID.", + "title": "Delete experiment", + "required_scopes": ["experiment:write"], + "annotations": { + "destructiveHint": true, + "idempotentHint": true, + "openWorldHint": true, + "readOnlyHint": false + } + }, + "experiment-update": { + "description": "Update an existing experiment by ID. Can update name, description, lifecycle state, variants, metrics, and other properties. RESTART WORKFLOW: To restart a concluded experiment, set end_date=null, conclusion=null, conclusion_comment=null, and optionally set a new start_date. To make it draft again, also set start_date=null. COMMON PATTERNS: Launch draft (set start_date), stop running (set end_date + conclusion), archive (set archived=true), modify variants (update parameters.feature_flag_variants). NOTE: feature_flag_key cannot be changed after creation.", + "category": "Experiments", + "feature": "experiments", + "summary": "Update an existing experiment with lifecycle management and restart capability.", + "title": "Update experiment", + "required_scopes": ["experiment:write"], + "annotations": { + "destructiveHint": false, + "idempotentHint": true, + "openWorldHint": true, + "readOnlyHint": false + } + }, "experiment-get": { "description": "Get details of a specific experiment by ID.", "category": "Experiments", @@ -223,6 +265,20 @@ "readOnlyHint": true } }, + "experiment-results-get": { + "description": "Get comprehensive experiment results including all metrics data (primary and secondary) and exposure data. This tool fetches the experiment details and executes the necessary queries to get complete experiment results. Only works with new experiments (not legacy experiments).", + "category": "Experiments", + "feature": "experiments", + "summary": "Get comprehensive experiment results including metrics and exposure data.", + "title": "Get experiment results", + "required_scopes": ["experiment:read"], + "annotations": { + "destructiveHint": false, + "idempotentHint": true, + "openWorldHint": true, + "readOnlyHint": true + } + }, "insight-create-from-query": { "description": "Create an insight from a query that you have previously tested with 'query-run'. You should check the query runs, before creating an insight. Do not create an insight before running the query, unless you know already that it is correct (e.g. you are making a minor modification to an existing query you have seen).", "category": "Insights & analytics", diff --git a/schema/tool-inputs.json b/schema/tool-inputs.json index ebfe5cc..4d87b65 100644 --- a/schema/tool-inputs.json +++ b/schema/tool-inputs.json @@ -221,6 +221,197 @@ }, "additionalProperties": false }, + "ExperimentCreateSchema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 1, + "description": "Experiment name - should clearly describe what is being tested" + }, + "description": { + "type": "string", + "description": "Detailed description of the experiment hypothesis, what changes are being tested, and expected outcomes" + }, + "feature_flag_key": { + "type": "string", + "description": "Feature flag key (letters, numbers, hyphens, underscores only). IMPORTANT: First search for existing feature flags that might be suitable using the feature-flags-get-all tool, then suggest reusing existing ones or creating a new key based on the experiment name" + }, + "type": { + "type": "string", + "enum": [ + "product", + "web" + ], + "default": "product", + "description": "Experiment type: 'product' for backend/API changes, 'web' for frontend UI changes" + }, + "primary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type: 'mean' for average values (revenue, time spent), 'funnel' for conversion flows, 'ratio' for comparing two metrics" + }, + "event_name": { + "type": "string", + "description": "REQUIRED for metrics to work: PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase'). For funnels, this is the first step. Use '$pageview' if unsure. Search project-property-definitions tool for available events." + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names for each funnel step (e.g., ['product_view', 'add_to_cart', 'checkout', 'purchase'])" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this metric measures and why it's important for the experiment" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Primary metrics to measure experiment success. IMPORTANT: Each metric needs event_name to track data. For funnels, provide funnel_steps array with event names for each step. Ask user what events they track, or use project-property-definitions to find available events." + }, + "secondary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics" + }, + "event_name": { + "type": "string", + "description": "REQUIRED: PostHog event name. Use '$pageview' if unsure." + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names for each funnel step" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this secondary metric measures" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Secondary metrics to monitor for potential side effects or additional insights. Each metric needs event_name." + }, + "variants": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Variant key (e.g., 'control', 'variant_a', 'new_design')" + }, + "name": { + "type": "string", + "description": "Human-readable variant name" + }, + "rollout_percentage": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "Percentage of users to show this variant" + } + }, + "required": [ + "key", + "rollout_percentage" + ], + "additionalProperties": false + }, + "description": "Experiment variants. If not specified, defaults to 50/50 control/test split. Ask user how many variants they need and what each tests" + }, + "minimum_detectable_effect": { + "type": "number", + "default": 30, + "description": "Minimum detectable effect in percentage. Lower values require more users but detect smaller changes. Suggest 20-30% for most experiments" + }, + "filter_test_accounts": { + "type": "boolean", + "default": true, + "description": "Whether to filter out internal test accounts" + }, + "target_properties": { + "type": "object", + "additionalProperties": {}, + "description": "Properties to target specific user segments (e.g., country, subscription type)" + }, + "draft": { + "type": "boolean", + "default": true, + "description": "Create as draft (true) or launch immediately (false). Recommend draft for review first" + }, + "holdout_id": { + "type": "number", + "description": "Holdout group ID if this experiment should exclude users from other experiments" + } + }, + "required": [ + "name", + "feature_flag_key" + ], + "additionalProperties": false + }, + "ExperimentDeleteSchema": { + "type": "object", + "properties": { + "experimentId": { + "type": "number", + "description": "The ID of the experiment to delete" + } + }, + "required": [ + "experimentId" + ], + "additionalProperties": false + }, "ExperimentGetAllSchema": { "type": "object", "properties": {}, @@ -239,6 +430,317 @@ ], "additionalProperties": false }, + "ExperimentResultsGetSchema": { + "type": "object", + "properties": { + "experimentId": { + "type": "number", + "description": "The ID of the experiment to get comprehensive results for" + }, + "refresh": { + "type": "boolean", + "description": "Force refresh of results instead of using cached values" + } + }, + "required": [ + "experimentId", + "refresh" + ], + "additionalProperties": false + }, + "ExperimentUpdateInputSchema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Update experiment name" + }, + "description": { + "type": "string", + "description": "Update experiment description" + }, + "primary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics" + }, + "event_name": { + "type": "string", + "description": "PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase')" + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names for each funnel step" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this metric measures" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Update primary metrics" + }, + "secondary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type" + }, + "event_name": { + "type": "string", + "description": "PostHog event name" + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this metric measures" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Update secondary metrics" + }, + "minimum_detectable_effect": { + "type": "number", + "description": "Update minimum detectable effect in percentage" + }, + "launch": { + "type": "boolean", + "description": "Launch experiment (set start_date) or keep as draft" + }, + "conclude": { + "type": "string", + "enum": [ + "won", + "lost", + "inconclusive", + "stopped_early", + "invalid" + ], + "description": "Conclude experiment with result" + }, + "conclusion_comment": { + "type": "string", + "description": "Comment about experiment conclusion" + }, + "restart": { + "type": "boolean", + "description": "Restart concluded experiment (clears end_date and conclusion)" + }, + "archive": { + "type": "boolean", + "description": "Archive or unarchive experiment" + } + }, + "additionalProperties": false + }, + "ExperimentUpdateSchema": { + "type": "object", + "properties": { + "experimentId": { + "type": "number", + "description": "The ID of the experiment to update" + }, + "data": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Update experiment name" + }, + "description": { + "type": "string", + "description": "Update experiment description" + }, + "primary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics" + }, + "event_name": { + "type": "string", + "description": "PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase')" + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names for each funnel step" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this metric measures" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Update primary metrics" + }, + "secondary_metrics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Human-readable metric name" + }, + "metric_type": { + "type": "string", + "enum": [ + "mean", + "funnel", + "ratio" + ], + "description": "Metric type" + }, + "event_name": { + "type": "string", + "description": "PostHog event name" + }, + "funnel_steps": { + "type": "array", + "items": { + "type": "string" + }, + "description": "For funnel metrics only: Array of event names" + }, + "properties": { + "type": "object", + "additionalProperties": {}, + "description": "Event properties to filter on" + }, + "description": { + "type": "string", + "description": "What this metric measures" + } + }, + "required": [ + "metric_type", + "event_name" + ], + "additionalProperties": false + }, + "description": "Update secondary metrics" + }, + "minimum_detectable_effect": { + "type": "number", + "description": "Update minimum detectable effect in percentage" + }, + "launch": { + "type": "boolean", + "description": "Launch experiment (set start_date) or keep as draft" + }, + "conclude": { + "type": "string", + "enum": [ + "won", + "lost", + "inconclusive", + "stopped_early", + "invalid" + ], + "description": "Conclude experiment with result" + }, + "conclusion_comment": { + "type": "string", + "description": "Comment about experiment conclusion" + }, + "restart": { + "type": "boolean", + "description": "Restart concluded experiment (clears end_date and conclusion)" + }, + "archive": { + "type": "boolean", + "description": "Archive or unarchive experiment" + } + }, + "additionalProperties": false, + "description": "The experiment data to update using user-friendly format" + } + }, + "required": [ + "experimentId", + "data" + ], + "additionalProperties": false + }, "FeatureFlagCreateSchema": { "type": "object", "properties": { diff --git a/typescript/package.json b/typescript/package.json index 35b9242..1673434 100644 --- a/typescript/package.json +++ b/typescript/package.json @@ -53,6 +53,7 @@ "agents": "^0.0.113", "ai": "^5.0.18", "posthog-node": "^4.18.0", + "uuid": "^11.1.0", "zod": "^3.24.4" }, "devDependencies": { @@ -66,7 +67,6 @@ "tsx": "^4.20.5", "typed-openapi": "^2.2.2", "typescript": "^5.8.3", - "uuid": "^11.1.0", "vite": "^5.0.0", "vite-tsconfig-paths": "^5.1.4", "vitest": "^3.2.4", diff --git a/typescript/pnpm-lock.yaml b/typescript/pnpm-lock.yaml index d0466df..4f01998 100644 --- a/typescript/pnpm-lock.yaml +++ b/typescript/pnpm-lock.yaml @@ -20,6 +20,9 @@ importers: posthog-node: specifier: ^4.18.0 version: 4.18.0 + uuid: + specifier: ^11.1.0 + version: 11.1.0 zod: specifier: ^3.24.4 version: 3.25.76 @@ -54,9 +57,6 @@ importers: typescript: specifier: ^5.8.3 version: 5.9.2 - uuid: - specifier: ^11.1.0 - version: 11.1.0 vite: specifier: ^5.0.0 version: 5.4.19(@types/node@22.17.2) diff --git a/typescript/src/api/client.ts b/typescript/src/api/client.ts index 17a8e0e..218fcf5 100644 --- a/typescript/src/api/client.ts +++ b/typescript/src/api/client.ts @@ -19,11 +19,23 @@ import { type SimpleDashboard, SimpleDashboardSchema, } from "@/schema/dashboards"; -import type { Experiment } from "@/schema/experiments"; -import { ExperimentSchema } from "@/schema/experiments"; +import type { + Experiment, + ExperimentExposureQuery, + ExperimentExposureQueryResponse, + ExperimentUpdateApiPayload, +} from "@/schema/experiments"; +import { + ExperimentCreatePayloadSchema, + ExperimentExposureQueryResponseSchema, + ExperimentExposureQuerySchema, + ExperimentSchema, + ExperimentUpdateApiPayloadSchema, +} from "@/schema/experiments"; import { type CreateFeatureFlagInput, CreateFeatureFlagInputSchema, + type FeatureFlag, FeatureFlagSchema, type UpdateFeatureFlagInput, UpdateFeatureFlagInputSchema, @@ -38,7 +50,7 @@ import { } from "@/schema/insights"; import { type Organization, OrganizationSchema } from "@/schema/orgs"; import { type Project, ProjectSchema } from "@/schema/projects"; -import { PropertyDefinitionSchema } from "@/schema/properties"; +import type { ExperimentCreateSchema } from "@/schema/tool-inputs"; import { isShortId } from "@/tools/insights/utils"; import { z } from "zod"; import type { @@ -316,6 +328,291 @@ export class ApiClient { ExperimentSchema, ); }, + + getExposures: async ({ + experimentId, + refresh = false, + }: { + experimentId: number; + refresh: boolean; + }): Promise< + Result<{ + exposures: ExperimentExposureQueryResponse; + }> + > => { + /** + * we have to get the experiment details first. There's no guarantee + * that the user has queried for the experiment details before. + */ + const experimentDetails = await this.experiments({ projectId }).get({ + experimentId, + }); + if (!experimentDetails.success) return experimentDetails; + + const experiment = experimentDetails.data; + + /** + * Validate that the experiment has started + */ + if (!experiment.start_date) { + return { + success: false, + error: new Error( + `Experiment "${experiment.name}" has not started yet. Exposure data is only available for started experiments.`, + ), + }; + } + + /** + * create the exposure query + */ + const exposureQuery: ExperimentExposureQuery = { + kind: "ExperimentExposureQuery", + experiment_id: experimentId, + experiment_name: experiment.name, + exposure_criteria: experiment.exposure_criteria, + feature_flag: experiment.feature_flag as FeatureFlag, + start_date: experiment.start_date, + end_date: experiment.end_date, + holdout: experiment.holdout, + }; + + // Validate against existing ExperimentExposureQuerySchema + const validated = ExperimentExposureQuerySchema.parse(exposureQuery); + + // The API expects a QueryRequest object with the query wrapped + const queryRequest: any = { + query: validated, + ...(refresh ? { refresh: "blocking" } : {}), + }; + + const result = await this.fetchWithSchema( + `${this.baseUrl}/api/environments/${projectId}/query/`, + ExperimentExposureQueryResponseSchema, + { + method: "POST", + body: JSON.stringify(queryRequest), + }, + ); + + if (!result.success) { + return result; + } + + return { + success: true, + data: { + exposures: result.data, + }, + }; + }, + + getMetricResults: async ({ + experimentId, + refresh = false, + }: { + experimentId: number; + refresh?: boolean; + }): Promise< + Result<{ + experiment: Experiment; + primaryMetricsResults: any[]; + secondaryMetricsResults: any[]; + exposures: ExperimentExposureQueryResponse; + }> + > => { + /** + * we have to get the experiment details first. There's no guarantee + * that the user has queried for the experiment details before. + */ + const experimentDetails = await this.experiments({ projectId }).get({ + experimentId, + }); + + if (!experimentDetails.success) return experimentDetails; + + const experiment = experimentDetails.data; + + /** + * Validate that the experiment has started + */ + if (!experiment.start_date) { + return { + success: false, + error: new Error( + `Experiment "${experiment.name}" has not started yet. Results are only available for started experiments.`, + ), + }; + } + + /** + * let's get the experiment exposure details to get the full + * picture of the resutls. + */ + const experimentExposure = await this.experiments({ projectId }).getExposures({ + experimentId, + refresh, + }); + if (!experimentExposure.success) return experimentExposure; + + const { exposures } = experimentExposure.data; + + // Prepare metrics queries + const sharedPrimaryMetrics = (experiment.saved_metrics || []) + .filter(({ metadata }) => metadata.type === "primary") + .map(({ query }) => query); + const allPrimaryMetrics = [...(experiment.metrics || []), ...sharedPrimaryMetrics]; + + const sharedSecondaryMetrics = (experiment.saved_metrics || []) + .filter(({ metadata }) => metadata.type === "secondary") + .map(({ query }) => query); + const allSecondaryMetrics = [ + ...(experiment.metrics_secondary || []), + ...sharedSecondaryMetrics, + ]; + + // Execute queries for primary metrics + const primaryResults = await Promise.all( + allPrimaryMetrics.map(async (metric) => { + try { + const queryBody = { + kind: "ExperimentQuery", + metric, + experiment_id: experimentId, + }; + + const queryRequest = { + query: queryBody, + ...(refresh ? { refresh: "blocking" } : {}), + }; + + const result = await this.fetchWithSchema( + `${this.baseUrl}/api/environments/${projectId}/query/`, + z.any(), + { + method: "POST", + body: JSON.stringify(queryRequest), + }, + ); + + return result.success ? result.data : null; + } catch (error) { + return null; + } + }), + ); + + // Execute queries for secondary metrics + const secondaryResults = await Promise.all( + allSecondaryMetrics.map(async (metric) => { + try { + const queryBody = { + kind: "ExperimentQuery", + metric, + experiment_id: experimentId, + }; + + const queryRequest = { + query: queryBody, + ...(refresh ? { refresh: "blocking" } : {}), + }; + + const result = await this.fetchWithSchema( + `${this.baseUrl}/api/environments/${projectId}/query/`, + z.any(), + { + method: "POST", + body: JSON.stringify(queryRequest), + }, + ); + + return result.success ? result.data : null; + } catch (error) { + return null; + } + }), + ); + + return { + success: true, + data: { + experiment, + primaryMetricsResults: primaryResults, + secondaryMetricsResults: secondaryResults, + exposures, + }, + }; + }, + + create: async ( + experimentData: z.infer, + ): Promise> => { + // Transform agent input to API payload + const createBody = ExperimentCreatePayloadSchema.parse(experimentData); + + return this.fetchWithSchema( + `${this.baseUrl}/api/projects/${projectId}/experiments/`, + ExperimentSchema, + { + method: "POST", + body: JSON.stringify(createBody), + }, + ); + }, + + update: async ({ + experimentId, + updateData, + }: { + experimentId: number; + updateData: ExperimentUpdateApiPayload; + }): Promise> => { + try { + const updateBody = ExperimentUpdateApiPayloadSchema.parse(updateData); + + return this.fetchWithSchema( + `${this.baseUrl}/api/projects/${projectId}/experiments/${experimentId}/`, + ExperimentSchema, + { + method: "PATCH", + body: JSON.stringify(updateBody), + }, + ); + } catch (error) { + return { success: false, error: new Error(`Update failed: ${error}`) }; + } + }, + + delete: async ({ + experimentId, + }: { experimentId: number }): Promise< + Result<{ success: boolean; message: string }> + > => { + try { + const deleteResponse = await fetch( + `${this.baseUrl}/api/projects/${projectId}/experiments/${experimentId}/`, + { + method: "PATCH", + headers: this.buildHeaders(), + body: JSON.stringify({ deleted: true }), + }, + ); + + if (deleteResponse.ok) { + return { + success: true, + data: { success: true, message: "Experiment deleted successfully" }, + }; + } + + return { + success: false, + error: new Error(`Delete failed with status: ${deleteResponse.status}`), + }; + } catch (error) { + return { success: false, error: new Error(`Delete failed: ${error}`) }; + } + }, }; } diff --git a/typescript/src/schema/experiments.ts b/typescript/src/schema/experiments.ts index 166b850..a405579 100644 --- a/typescript/src/schema/experiments.ts +++ b/typescript/src/schema/experiments.ts @@ -1,31 +1,560 @@ +import { v4 as uuidv4 } from "uuid"; import { z } from "zod"; +import { FeatureFlagSchema } from "./flags"; +import { + ExperimentCreateSchema as ToolExperimentCreateSchema, + ExperimentUpdateInputSchema as ToolExperimentUpdateInputSchema, +} from "./tool-inputs"; +const ExperimentType = ["web", "product"] as const; + +const ExperimentConclusion = ["won", "lost", "inconclusive", "stopped_early", "invalid"] as const; + +/** + * This is the schema for the experiment metric base properties. + * It references the ExperimentMetricBaseProperties type from + * @posthog/frontend/src/queries/schema/schema-general.ts + * + * TODO: Add the schemas for FunnelConversionWindowTimeUnit + */ +export const ExperimentMetricBasePropertiesSchema = z.object({ + kind: z.literal("ExperimentMetric"), + uuid: z.string().optional(), + name: z.string().optional(), + conversion_window: z.number().optional(), + conversion_window_unit: z.any().optional(), // FunnelConversionWindowTimeUnit +}); + +export type ExperimentMetricBaseProperties = z.infer; + +/** + * This is the schema for the experiment metric outlier handling. + * It references the ExperimentMetricOutlierHandling type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentMetricOutlierHandlingSchema = z.object({ + lower_bound_percentile: z.number().optional(), + upper_bound_percentile: z.number().optional(), +}); + +export type ExperimentMetricOutlierHandling = z.infer; + +/** + * This is the schema for the experiment metric source. + * It references the ExperimentMetricSource type from + * @posthog/frontend/src/queries/schema/schema-general.ts + * + * TODO: Add the schemas for the EventsNode and ActionsNode and ExperimentDataWarehouseNode + */ +export const ExperimentMetricSourceSchema = z.any(); // EventsNode | ActionsNode | ExperimentDataWarehouseNode + +/** + * This is the schema for the experiment funnel metric step. + * It references the ExperimentFunnelMetricStep type from + * @posthog/frontend/src/queries/schema/schema-general.ts + * + * TODO: Add the schemas for the EventsNode and ActionsNode + */ +export const ExperimentFunnelMetricStepSchema = z.any(); // EventsNode | ActionsNode + +/** + * This is the schema for the experiment mean metric. + * It references the ExperimentMeanMetric type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentMeanMetricSchema = z + .object({ + metric_type: z.literal("mean"), + source: ExperimentMetricSourceSchema, + }) + .merge(ExperimentMetricBasePropertiesSchema) + .merge(ExperimentMetricOutlierHandlingSchema); + +export type ExperimentMeanMetric = z.infer; + +/** + * This is the schema for the experiment funnel metric. + * It references the ExperimentFunnelMetric type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentFunnelMetricSchema = z + .object({ + metric_type: z.literal("funnel"), + series: z.array(ExperimentFunnelMetricStepSchema), + funnel_order_type: z.any().optional(), // StepOrderValue + }) + .merge(ExperimentMetricBasePropertiesSchema); + +export type ExperimentFunnelMetric = z.infer; + +/** + * This is the schema for the experiment ratio metric. + * It references the ExperimentRatioMetric type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentRatioMetricSchema = z + .object({ + metric_type: z.literal("ratio"), + numerator: ExperimentMetricSourceSchema, + denominator: ExperimentMetricSourceSchema, + }) + .merge(ExperimentMetricBasePropertiesSchema); + +export type ExperimentRatioMetric = z.infer; + +/** + * This is the schema for the experiment metric. + * It references the ExperimentMetric type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentMetricSchema = z.union([ + ExperimentMeanMetricSchema, + ExperimentFunnelMetricSchema, + ExperimentRatioMetricSchema, +]); + +export type ExperimentMetric = z.infer; + +/** + * This is the schema for the experiment exposure config. + * It references the ExperimentEventExposureConfig type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentEventExposureConfigSchema = z.object({ + kind: z.literal("ExperimentEventExposureConfig"), + event: z.string(), + properties: z.array(z.any()), // this is an array of AnyPropertyFilter +}); + +/** + * This is the schema for the experiment exposure criteria. + * It references the ExperimentExposureCriteria type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentExposureCriteriaSchema = z.object({ + filterTestAccounts: z.boolean().optional(), + exposure_config: ExperimentEventExposureConfigSchema.optional(), + multiple_variant_handling: z.enum(["exclude", "first_seen"]).optional(), +}); + +/** + * This is the schema for the experiment object. + * It references the Experiment type from + * @posthog/frontend/src/types.ts + */ export const ExperimentSchema = z.object({ id: z.number(), name: z.string(), + type: z.enum(ExperimentType).nullish(), description: z.string().nullish(), feature_flag_key: z.string(), - start_date: z.string().nullish(), - end_date: z.string().nullish(), - created_at: z.string(), - updated_at: z.string(), - archived: z.boolean(), + feature_flag: FeatureFlagSchema.nullish(), + exposure_cohort: z.number().nullish(), + exposure_criteria: ExperimentExposureCriteriaSchema.nullish(), + /** + * We only type ExperimentMetrics. Legacy metric formats are not validated. + */ + metrics: z.array(z.union([ExperimentMetricSchema, z.any()])).nullish(), + metrics_secondary: z.array(z.union([ExperimentMetricSchema, z.any()])).nullish(), + saved_metrics: z.array(z.any()).nullish(), + saved_metrics_ids: z.array(z.any()).nullable(), parameters: z .object({ - feature_flag_variants: z.array( - z.object({ - key: z.string(), - name: z.string().nullish(), - rollout_percentage: z.number().nullish(), - }), - ), + feature_flag_variants: z + .array( + z.object({ + key: z.string(), + name: z.string().nullish(), + rollout_percentage: z.number().nullish(), + }), + ) + .nullish(), minimum_detectable_effect: z.number().nullish(), recommended_running_time: z.number().nullish(), recommended_sample_size: z.number().nullish(), }) .nullish(), - metrics: z.array(z.any()).nullish(), - secondary_metrics: z.array(z.any()).nullish(), + start_date: z.string().nullish(), + end_date: z.string().nullish(), + archived: z.boolean(), + deleted: z.boolean(), + created_at: z.string(), + updated_at: z.string(), + holdout: z.any().nullish(), + holdout_id: z.number().nullish(), + stats_config: z.any().optional(), + conclusion: z.enum(ExperimentConclusion).nullish(), + conclusion_comment: z.string().nullish(), }); export type Experiment = z.infer; + +/** + * Schema for the API payload when creating an experiment + * This is derived from ExperimentSchema with appropriate omissions + */ +export const ExperimentApiPayloadSchema = ExperimentSchema.omit({ + id: true, + feature_flag: true, + exposure_cohort: true, + exposure_criteria: true, + saved_metrics: true, + saved_metrics_ids: true, + start_date: true, + end_date: true, + deleted: true, + archived: true, + created_at: true, + updated_at: true, + holdout: true, + stats_config: true, + conclusion: true, + conclusion_comment: true, +}).partial(); + +export type ExperimentApiPayload = z.infer; + +/** + * Schema for the API payload when updating an experiment + * Derived from ExperimentSchema, omitting fields that cannot be updated + */ +export const ExperimentUpdateApiPayloadSchema = ExperimentSchema.omit({ + id: true, + feature_flag: true, + feature_flag_key: true, + type: true, + exposure_cohort: true, + saved_metrics: true, + deleted: true, + created_at: true, + updated_at: true, + holdout: true, + holdout_id: true, +}).partial(); + +export type ExperimentUpdateApiPayload = z.infer; + +/** + * Transform tool input metrics to ExperimentMetric format for API + */ +const transformMetricToApi = (metric: any): z.infer => { + const uuid = uuidv4(); + const base = { + kind: "ExperimentMetric" as const, + uuid, + name: metric.name, + }; + + switch (metric.metric_type) { + case "mean": + return { + ...base, + metric_type: "mean", + source: { + kind: "EventsNode", + event: metric.event_name, + properties: metric.properties || {}, + }, + }; + + case "funnel": + return { + ...base, + metric_type: "funnel", + series: (metric.funnel_steps || [metric.event_name]).map((event: string) => ({ + kind: "EventsNode", + event, + properties: metric.properties || {}, + })), + }; + + case "ratio": + return { + ...base, + metric_type: "ratio", + numerator: { + kind: "EventsNode", + event: metric.event_name, + properties: metric.properties?.numerator || metric.properties || {}, + }, + denominator: { + kind: "EventsNode", + event: metric.properties?.denominator_event || metric.event_name, + properties: metric.properties?.denominator || metric.properties || {}, + }, + }; + + default: + throw new Error(`Unknown metric type: ${metric.metric_type}`); + } +}; + +/** + * Transform tool input to API payload format + * This bridges the gap between user-friendly input and PostHog API requirements + */ +export const ExperimentCreatePayloadSchema = ToolExperimentCreateSchema.transform((input) => { + // Transform metrics with proper UUIDs + const primaryMetrics = input.primary_metrics?.map(transformMetricToApi) || []; + const secondaryMetrics = input.secondary_metrics?.map(transformMetricToApi) || []; + + return { + // Core fields + name: input.name, + description: input.description || null, + feature_flag_key: input.feature_flag_key, // Maps to get_feature_flag_key in serializer + type: input.type || "product", + + // Metrics - ensure arrays are never null, always empty arrays when no metrics + metrics: primaryMetrics, + metrics_secondary: secondaryMetrics, + + // Metrics UUIDs for ordering - ensure arrays are never null + primary_metrics_ordered_uuids: primaryMetrics.map((m) => m.uuid), + secondary_metrics_ordered_uuids: secondaryMetrics.map((m) => m.uuid), + + // Legacy fields still required by API + filters: {}, // Legacy but still in model + secondary_metrics: secondaryMetrics, // Use the same array as metrics_secondary + saved_metrics_ids: [], // Empty array for saved metrics + + // Parameters with variants + parameters: { + feature_flag_variants: input.variants || [ + { key: "control", name: "Control", rollout_percentage: 50 }, + { key: "test", name: "Test", rollout_percentage: 50 }, + ], + minimum_detectable_effect: input.minimum_detectable_effect || 30, + }, + + // Exposure criteria + exposure_criteria: input.filter_test_accounts + ? { + filterTestAccounts: input.filter_test_accounts, + } + : null, + + // Stats config (empty, will be filled by backend) + stats_config: {}, + + // State fields + start_date: input.draft === false ? new Date().toISOString() : null, + end_date: null, + archived: false, + deleted: false, + + // Optional holdout + holdout_id: input.holdout_id || null, + }; +}).pipe(ExperimentApiPayloadSchema); + +export type ExperimentCreatePayload = z.output; + +/** + * Transform user-friendly update input to API payload format for experiment updates + * This handles partial updates with the same transformation patterns as creation + */ +export const ExperimentUpdateTransformSchema = ToolExperimentUpdateInputSchema.transform( + (input) => { + const updatePayload: Record = {}; + + // Basic fields - direct mapping + if (input.name !== undefined) { + updatePayload.name = input.name; + } + if (input.description !== undefined) { + updatePayload.description = input.description; + } + + // Transform metrics if provided + if (input.primary_metrics !== undefined) { + updatePayload.metrics = input.primary_metrics.map(transformMetricToApi); + updatePayload.primary_metrics_ordered_uuids = updatePayload.metrics.map( + (m: any) => m.uuid!, + ); + } + + if (input.secondary_metrics !== undefined) { + updatePayload.metrics_secondary = input.secondary_metrics.map(transformMetricToApi); + updatePayload.secondary_metrics_ordered_uuids = updatePayload.metrics_secondary.map( + (m: any) => m.uuid!, + ); + } + + // Transform minimum detectable effect into parameters + if (input.minimum_detectable_effect !== undefined) { + updatePayload.parameters = { + ...updatePayload.parameters, + minimum_detectable_effect: input.minimum_detectable_effect, + }; + } + + // Handle experiment state management + if (input.launch === true) { + updatePayload.start_date = new Date().toISOString(); + } + + if (input.conclude !== undefined) { + updatePayload.conclusion = input.conclude; + updatePayload.end_date = new Date().toISOString(); + if (input.conclusion_comment !== undefined) { + updatePayload.conclusion_comment = input.conclusion_comment; + } + } + + if (input.restart === true) { + updatePayload.end_date = null; + updatePayload.conclusion = null; + updatePayload.conclusion_comment = null; + } + + if (input.archive !== undefined) { + updatePayload.archived = input.archive; + } + + return updatePayload; + }, +).pipe(ExperimentUpdateApiPayloadSchema); + +export type ExperimentUpdateTransform = z.output; + +/** + * This is the schema for the experiment exposure query. + * It references the ExperimentExposureQuery type from + * @posthog/frontend/src/queries/schema/schema-general.ts + */ +export const ExperimentExposureQuerySchema = z.object({ + kind: z.literal("ExperimentExposureQuery"), + experiment_id: z.number(), + experiment_name: z.string(), + exposure_criteria: ExperimentExposureCriteriaSchema.nullish(), + feature_flag: FeatureFlagSchema.optional(), + start_date: z.string().nullish(), + end_date: z.string().nullish(), + holdout: z.any().optional(), +}); + +export type ExperimentExposureQuery = z.infer; + +export const ExperimentExposureTimeSeriesSchema = z.object({ + variant: z.string(), + days: z.array(z.string()), + exposure_counts: z.array(z.number()), +}); + +export const ExperimentExposureQueryResponseSchema = z.object({ + kind: z.literal("ExperimentExposureQuery"), // API returns the query kind, not a response kind + timeseries: z.array(ExperimentExposureTimeSeriesSchema), + total_exposures: z.record(z.string(), z.number()), + date_range: z.object({ + date_from: z.string(), + date_to: z.string().nullable(), // API can return null for date_to + }), +}); + +export type ExperimentExposureQueryResponse = z.infer; + +export const ExperimentResultsResponseSchema = z + .object({ + experiment: ExperimentSchema.pick({ + id: true, + name: true, + description: true, + feature_flag_key: true, + start_date: true, + end_date: true, + metrics: true, + metrics_secondary: true, + parameters: true, // Pick parameters to extract variants + }).transform((data) => ({ + id: data.id, + name: data.name, + description: data.description, + feature_flag_key: data.feature_flag_key, + metrics: data.metrics, + metrics_secondary: data.metrics_secondary, + start_date: data.start_date, + end_date: data.end_date, + status: data.start_date ? (data.end_date ? "completed" : "running") : "draft", + variants: data.parameters?.feature_flag_variants || [], + })), + exposures: ExperimentExposureQueryResponseSchema, + primaryMetricsResults: z.array(z.any()), + secondaryMetricsResults: z.array(z.any()), + }) + .transform(({ experiment, exposures, primaryMetricsResults, secondaryMetricsResults }) => { + return { + experiment, + exposures, + metrics: { + primary: { + count: primaryMetricsResults.length, + results: primaryMetricsResults + .map((result, index) => ({ + index, + data: result, + })) + .filter((item) => item.data !== null), + }, + secondary: { + count: secondaryMetricsResults.length, + results: secondaryMetricsResults + .map((result, index) => ({ + index, + data: result, + })) + .filter((item) => item.data !== null), + }, + }, + }; + }); + +/** + * Schema for updating existing experiments + * All fields are optional to support partial updates + */ +export const ExperimentUpdatePayloadSchema = z + .object({ + name: z.string().optional(), + description: z.string().nullish(), + start_date: z.string().nullish(), + end_date: z.string().nullish(), + + // Parameters + parameters: z + .object({ + feature_flag_variants: z + .array( + z.object({ + key: z.string(), + name: z.string().optional(), + rollout_percentage: z.number(), + }), + ) + .optional(), + minimum_detectable_effect: z.number().nullish(), + recommended_running_time: z.number().nullish(), + recommended_sample_size: z.number().nullish(), + variant_screenshot_media_ids: z.record(z.array(z.string())).optional(), + }) + .optional(), + + // Metrics + metrics: z.array(ExperimentMetricSchema).optional(), + metrics_secondary: z.array(ExperimentMetricSchema).optional(), + primary_metrics_ordered_uuids: z.array(z.string()).nullish(), + secondary_metrics_ordered_uuids: z.array(z.string()).nullish(), + + // State management + archived: z.boolean().optional(), + conclusion: z.enum(ExperimentConclusion).nullish(), + conclusion_comment: z.string().nullish(), + + // Configuration + exposure_criteria: ExperimentExposureCriteriaSchema.optional(), + saved_metrics_ids: z.array(z.any()).nullish(), + stats_config: z.any().optional(), + }) + .strict(); + +export type ExperimentUpdatePayload = z.infer; diff --git a/typescript/src/schema/tool-inputs.ts b/typescript/src/schema/tool-inputs.ts index 01e77bc..7fe7296 100644 --- a/typescript/src/schema/tool-inputs.ts +++ b/typescript/src/schema/tool-inputs.ts @@ -56,6 +56,237 @@ export const ExperimentGetSchema = z.object({ experimentId: z.number().describe("The ID of the experiment to retrieve"), }); +export const ExperimentResultsGetSchema = z.object({ + experimentId: z.number().describe("The ID of the experiment to get comprehensive results for"), + refresh: z.boolean().describe("Force refresh of results instead of using cached values"), +}); + +export const ExperimentDeleteSchema = z.object({ + experimentId: z.number().describe("The ID of the experiment to delete"), +}); + +/** + * User-friendly input schema for experiment updates + * This provides a simplified interface that gets transformed to API format + */ +export const ExperimentUpdateInputSchema = z.object({ + name: z.string().optional().describe("Update experiment name"), + + description: z.string().optional().describe("Update experiment description"), + + // Primary metrics with guidance + primary_metrics: z + .array( + z.object({ + name: z.string().optional().describe("Human-readable metric name"), + metric_type: z + .enum(["mean", "funnel", "ratio"]) + .describe( + "Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics", + ), + event_name: z + .string() + .describe("PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase')"), + funnel_steps: z + .array(z.string()) + .optional() + .describe("For funnel metrics only: Array of event names for each funnel step"), + properties: z.record(z.any()).optional().describe("Event properties to filter on"), + description: z.string().optional().describe("What this metric measures"), + }), + ) + .optional() + .describe("Update primary metrics"), + + secondary_metrics: z + .array( + z.object({ + name: z.string().optional().describe("Human-readable metric name"), + metric_type: z.enum(["mean", "funnel", "ratio"]).describe("Metric type"), + event_name: z.string().describe("PostHog event name"), + funnel_steps: z + .array(z.string()) + .optional() + .describe("For funnel metrics only: Array of event names"), + properties: z.record(z.any()).optional().describe("Event properties to filter on"), + description: z.string().optional().describe("What this metric measures"), + }), + ) + .optional() + .describe("Update secondary metrics"), + + minimum_detectable_effect: z + .number() + .optional() + .describe("Update minimum detectable effect in percentage"), + + // Experiment state management + launch: z.boolean().optional().describe("Launch experiment (set start_date) or keep as draft"), + + conclude: z + .enum(["won", "lost", "inconclusive", "stopped_early", "invalid"]) + .optional() + .describe("Conclude experiment with result"), + + conclusion_comment: z.string().optional().describe("Comment about experiment conclusion"), + + restart: z + .boolean() + .optional() + .describe("Restart concluded experiment (clears end_date and conclusion)"), + + archive: z.boolean().optional().describe("Archive or unarchive experiment"), +}); + +export const ExperimentUpdateSchema = z.object({ + experimentId: z.number().describe("The ID of the experiment to update"), + data: ExperimentUpdateInputSchema.describe( + "The experiment data to update using user-friendly format", + ), +}); + +export const ExperimentCreateSchema = z.object({ + name: z + .string() + .min(1) + .describe("Experiment name - should clearly describe what is being tested"), + + description: z + .string() + .optional() + .describe( + "Detailed description of the experiment hypothesis, what changes are being tested, and expected outcomes", + ), + + feature_flag_key: z + .string() + .describe( + "Feature flag key (letters, numbers, hyphens, underscores only). IMPORTANT: First search for existing feature flags that might be suitable using the feature-flags-get-all tool, then suggest reusing existing ones or creating a new key based on the experiment name", + ), + + type: z + .enum(["product", "web"]) + .default("product") + .describe( + "Experiment type: 'product' for backend/API changes, 'web' for frontend UI changes", + ), + + // Primary metrics with guidance + primary_metrics: z + .array( + z.object({ + name: z.string().optional().describe("Human-readable metric name"), + metric_type: z + .enum(["mean", "funnel", "ratio"]) + .describe( + "Metric type: 'mean' for average values (revenue, time spent), 'funnel' for conversion flows, 'ratio' for comparing two metrics", + ), + event_name: z + .string() + .describe( + "REQUIRED for metrics to work: PostHog event name (e.g., '$pageview', 'add_to_cart', 'purchase'). For funnels, this is the first step. Use '$pageview' if unsure. Search project-property-definitions tool for available events.", + ), + funnel_steps: z + .array(z.string()) + .optional() + .describe( + "For funnel metrics only: Array of event names for each funnel step (e.g., ['product_view', 'add_to_cart', 'checkout', 'purchase'])", + ), + properties: z.record(z.any()).optional().describe("Event properties to filter on"), + description: z + .string() + .optional() + .describe( + "What this metric measures and why it's important for the experiment", + ), + }), + ) + .optional() + .describe( + "Primary metrics to measure experiment success. IMPORTANT: Each metric needs event_name to track data. For funnels, provide funnel_steps array with event names for each step. Ask user what events they track, or use project-property-definitions to find available events.", + ), + + // Secondary metrics for additional insights + secondary_metrics: z + .array( + z.object({ + name: z.string().optional().describe("Human-readable metric name"), + metric_type: z + .enum(["mean", "funnel", "ratio"]) + .describe( + "Metric type: 'mean' for average values, 'funnel' for conversion flows, 'ratio' for comparing two metrics", + ), + event_name: z + .string() + .describe("REQUIRED: PostHog event name. Use '$pageview' if unsure."), + funnel_steps: z + .array(z.string()) + .optional() + .describe("For funnel metrics only: Array of event names for each funnel step"), + properties: z.record(z.any()).optional().describe("Event properties to filter on"), + description: z.string().optional().describe("What this secondary metric measures"), + }), + ) + .optional() + .describe( + "Secondary metrics to monitor for potential side effects or additional insights. Each metric needs event_name.", + ), + + // Feature flag variants + variants: z + .array( + z.object({ + key: z + .string() + .describe("Variant key (e.g., 'control', 'variant_a', 'new_design')"), + name: z.string().optional().describe("Human-readable variant name"), + rollout_percentage: z + .number() + .min(0) + .max(100) + .describe("Percentage of users to show this variant"), + }), + ) + .optional() + .describe( + "Experiment variants. If not specified, defaults to 50/50 control/test split. Ask user how many variants they need and what each tests", + ), + + // Experiment parameters + minimum_detectable_effect: z + .number() + .default(30) + .describe( + "Minimum detectable effect in percentage. Lower values require more users but detect smaller changes. Suggest 20-30% for most experiments", + ), + + // Exposure and targeting + filter_test_accounts: z + .boolean() + .default(true) + .describe("Whether to filter out internal test accounts"), + + target_properties: z + .record(z.any()) + .optional() + .describe("Properties to target specific user segments (e.g., country, subscription type)"), + + // Control flags + draft: z + .boolean() + .default(true) + .describe( + "Create as draft (true) or launch immediately (false). Recommend draft for review first", + ), + + holdout_id: z + .number() + .optional() + .describe( + "Holdout group ID if this experiment should exclude users from other experiments", + ), +}); + export const FeatureFlagCreateSchema = z.object({ name: z.string(), key: z.string(), diff --git a/typescript/src/tools/experiments/create.ts b/typescript/src/tools/experiments/create.ts new file mode 100644 index 0000000..d5d39d2 --- /dev/null +++ b/typescript/src/tools/experiments/create.ts @@ -0,0 +1,44 @@ +import { ExperimentCreateSchema } from "@/schema/tool-inputs"; +import type { Context, ToolBase } from "@/tools/types"; +import type { z } from "zod"; + +const schema = ExperimentCreateSchema; + +type Params = z.infer; + +/** + * Create a comprehensive A/B test experiment with guided setup + * This tool helps users create well-configured experiments through conversation + */ +export const createExperimentHandler = async (context: Context, params: Params) => { + const projectId = await context.stateManager.getProjectId(); + + const result = await context.api.experiments({ projectId }).create(params); + + if (!result.success) { + throw new Error(`Failed to create experiment: ${result.error.message}`); + } + + const experiment = result.data; + const experimentWithUrl = { + ...experiment, + url: `${context.api.getProjectBaseUrl(projectId)}/experiments/${experiment.id}`, + }; + + return { + content: [ + { + type: "text", + text: JSON.stringify(experimentWithUrl, null, 2), + }, + ], + }; +}; + +const tool = (): ToolBase => ({ + name: "experiment-create", + schema, + handler: createExperimentHandler, +}); + +export default tool; diff --git a/typescript/src/tools/experiments/delete.ts b/typescript/src/tools/experiments/delete.ts new file mode 100644 index 0000000..a7063ec --- /dev/null +++ b/typescript/src/tools/experiments/delete.ts @@ -0,0 +1,31 @@ +import { ExperimentDeleteSchema } from "@/schema/tool-inputs"; +import type { Context, ToolBase } from "@/tools/types"; +import type { z } from "zod"; + +const schema = ExperimentDeleteSchema; + +type Params = z.infer; + +export const deleteHandler = async (context: Context, { experimentId }: Params) => { + const projectId = await context.stateManager.getProjectId(); + + const deleteResult = await context.api.experiments({ projectId }).delete({ + experimentId, + }); + + if (!deleteResult.success) { + throw new Error(`Failed to delete experiment: ${deleteResult.error.message}`); + } + + return { + content: [{ type: "text", text: JSON.stringify(deleteResult.data) }], + }; +}; + +const tool = (): ToolBase => ({ + name: "experiment-delete", + schema, + handler: deleteHandler, +}); + +export default tool; diff --git a/typescript/src/tools/experiments/get.ts b/typescript/src/tools/experiments/get.ts index a854775..524bc43 100644 --- a/typescript/src/tools/experiments/get.ts +++ b/typescript/src/tools/experiments/get.ts @@ -6,11 +6,11 @@ const schema = ExperimentGetSchema; type Params = z.infer; -export const getHandler = async (context: Context, params: Params) => { +export const getHandler = async (context: Context, { experimentId }: Params) => { const projectId = await context.stateManager.getProjectId(); const result = await context.api.experiments({ projectId }).get({ - experimentId: params.experimentId, + experimentId: experimentId, }); if (!result.success) { diff --git a/typescript/src/tools/experiments/getAll.ts b/typescript/src/tools/experiments/getAll.ts index e1a8842..4eb6361 100644 --- a/typescript/src/tools/experiments/getAll.ts +++ b/typescript/src/tools/experiments/getAll.ts @@ -10,9 +10,11 @@ export const getAllHandler = async (context: Context, _params: Params) => { const projectId = await context.stateManager.getProjectId(); const results = await context.api.experiments({ projectId }).list(); + if (!results.success) { throw new Error(`Failed to get experiments: ${results.error.message}`); } + return { content: [{ type: "text", text: JSON.stringify(results.data) }] }; }; diff --git a/typescript/src/tools/experiments/getResults.ts b/typescript/src/tools/experiments/getResults.ts new file mode 100644 index 0000000..b8b294d --- /dev/null +++ b/typescript/src/tools/experiments/getResults.ts @@ -0,0 +1,53 @@ +import { ExperimentResultsResponseSchema } from "@/schema/experiments"; +import { ExperimentResultsGetSchema } from "@/schema/tool-inputs"; +import type { Context, ToolBase } from "@/tools/types"; +import type { z } from "zod"; + +const schema = ExperimentResultsGetSchema; + +type Params = z.infer; + +/** + * Get experiment results including metrics and exposures data + * This tool fetches the experiment details and executes the necessary queries + * to get metrics results (both primary and secondary) and exposure data + */ +export const getResultsHandler = async (context: Context, params: Params) => { + const projectId = await context.stateManager.getProjectId(); + + const result = await context.api.experiments({ projectId }).getMetricResults({ + experimentId: params.experimentId, + refresh: params.refresh, + }); + + if (!result.success) { + throw new Error(`Failed to get experiment results: ${result.error.message}`); + } + + const { experiment, primaryMetricsResults, secondaryMetricsResults, exposures } = result.data; + + // Format the response using the schema + const parsedExperiment = ExperimentResultsResponseSchema.parse({ + experiment, + primaryMetricsResults, + secondaryMetricsResults, + exposures, + }); + + return { + content: [ + { + type: "text", + text: JSON.stringify(parsedExperiment, null, 2), + }, + ], + }; +}; + +const tool = (): ToolBase => ({ + name: "experiment-results-get", + schema, + handler: getResultsHandler, +}); + +export default tool; diff --git a/typescript/src/tools/experiments/update.ts b/typescript/src/tools/experiments/update.ts new file mode 100644 index 0000000..1f8681c --- /dev/null +++ b/typescript/src/tools/experiments/update.ts @@ -0,0 +1,54 @@ +import { ExperimentUpdateTransformSchema } from "@/schema/experiments"; +import { ExperimentUpdateSchema } from "@/schema/tool-inputs"; +import { getToolDefinition } from "@/tools/toolDefinitions"; +import type { Context, Tool } from "@/tools/types"; +import type { z } from "zod"; + +const schema = ExperimentUpdateSchema; + +type Params = z.infer; + +export const updateHandler = async (context: Context, params: Params) => { + const { experimentId, data } = params; + const projectId = await context.stateManager.getProjectId(); + + // Transform the tool input to API payload format + const apiPayload = ExperimentUpdateTransformSchema.parse(data); + + const updateResult = await context.api.experiments({ projectId }).update({ + experimentId, + updateData: apiPayload, + }); + + if (!updateResult.success) { + throw new Error(`Failed to update experiment: ${updateResult.error.message}`); + } + + const experimentWithUrl = { + ...updateResult.data, + url: `${context.api.getProjectBaseUrl(projectId)}/experiments/${updateResult.data.id}`, + }; + + return { + content: [{ type: "text", text: JSON.stringify(experimentWithUrl, null, 2) }], + }; +}; + +const definition = getToolDefinition("experiment-update"); + +const tool = (): Tool => ({ + name: "experiment-update", + title: definition.title, + description: definition.description, + schema, + handler: updateHandler, + scopes: ["experiments:write"], + annotations: { + destructiveHint: false, + idempotentHint: true, + openWorldHint: true, + readOnlyHint: false, + }, +}); + +export default tool; diff --git a/typescript/src/tools/index.ts b/typescript/src/tools/index.ts index 601cd02..d4247df 100644 --- a/typescript/src/tools/index.ts +++ b/typescript/src/tools/index.ts @@ -32,9 +32,13 @@ import errorDetails from "./errorTracking/errorDetails"; // Error Tracking import listErrors from "./errorTracking/listErrors"; -import getExperiment from "./experiments/get"; // Experiments +import createExperiment from "./experiments/create"; +import deleteExperiment from "./experiments/delete"; +import getExperiment from "./experiments/get"; import getAllExperiments from "./experiments/getAll"; +import getExperimentResults from "./experiments/getResults"; +import updateExperiment from "./experiments/update"; import createInsight from "./insights/create"; import deleteInsight from "./insights/delete"; @@ -100,6 +104,10 @@ const TOOL_MAP: Record ToolBase> = { // Experiments "experiment-get-all": getAllExperiments, "experiment-get": getExperiment, + "experiment-results-get": getExperimentResults, + "experiment-create": createExperiment, + "experiment-delete": deleteExperiment, + "experiment-update": updateExperiment, // Insights "insights-get-all": getAllInsights, diff --git a/typescript/tests/api/client.integration.test.ts b/typescript/tests/api/client.integration.test.ts index 4a1fccc..23ec4fe 100644 --- a/typescript/tests/api/client.integration.test.ts +++ b/typescript/tests/api/client.integration.test.ts @@ -17,6 +17,7 @@ describe("API Client Integration Tests", { concurrent: false }, () => { featureFlags: [] as number[], insights: [] as number[], dashboards: [] as number[], + experiments: [] as number[], }; beforeAll(async () => { @@ -71,6 +72,18 @@ describe("API Client Integration Tests", { concurrent: false }, () => { } } createdResources.dashboards = []; + + // Clean up created experiments + for (const experimentId of createdResources.experiments) { + try { + await client.experiments({ projectId: testProjectId }).delete({ + experimentId, + }); + } catch (error) { + console.warn(`Failed to cleanup experiment ${experimentId}:`, error); + } + } + createdResources.experiments = []; }); describe.skip("Organizations API", () => { @@ -1094,4 +1107,545 @@ describe("API Client Integration Tests", { concurrent: false }, () => { } }); }); + + describe("Experiments API", () => { + // Helper function to create a test experiment + const createTestExperiment = async ( + options: { + name?: string; + description?: string; + featureFlagKey?: string; + type?: "product" | "web"; + draft?: boolean; + metrics?: Array<{ + name?: string; + metric_type: "mean" | "funnel" | "ratio"; + event_name?: string; + funnel_steps?: string[]; + properties?: Record; + description?: string; + }>; + } = {}, + ) => { + const timestamp = Date.now(); + const createResult = await client.experiments({ projectId: testProjectId }).create({ + name: options.name || `Test Experiment ${timestamp}`, + description: options.description || "Integration test experiment", + feature_flag_key: options.featureFlagKey || `test-exp-${timestamp}`, + type: options.type || "product", + primary_metrics: options.metrics + ? options.metrics.map((metric) => ({ + name: metric.name || "Test Metric", + metric_type: metric.metric_type, + event_name: metric.event_name || "$pageview", + funnel_steps: metric.funnel_steps, + properties: metric.properties || {}, + description: metric.description, + })) + : undefined, + variants: [ + { key: "control", name: "Control", rollout_percentage: 50 }, + { key: "test", name: "Test", rollout_percentage: 50 }, + ], + minimum_detectable_effect: 5, + filter_test_accounts: true, + draft: options.draft !== undefined ? options.draft : true, + }); + + expect(createResult.success).toBe(true); + + if (createResult.success) { + const experimentId = createResult.data.id; + createdResources.experiments.push(experimentId); + return createResult.data; + } + + throw new Error( + `Failed to create test experiment: ${(createResult as any).error?.message}`, + ); + }; + + it.skip("should list experiments", async () => { + const result = await client.experiments({ projectId: testProjectId }).list(); + + if (!result.success) { + console.error("List experiments failed:", result.error?.message); + } + + expect(result.success).toBe(true); + + if (result.success) { + expect(Array.isArray(result.data)).toBe(true); + for (const experiment of result.data) { + expect(experiment).toHaveProperty("id"); + expect(experiment).toHaveProperty("name"); + expect(experiment).toHaveProperty("feature_flag_key"); + expect(typeof experiment.id).toBe("number"); + expect(typeof experiment.name).toBe("string"); + expect(typeof experiment.feature_flag_key).toBe("string"); + } + } + }); + + it("should create, get, update experiment", async () => { + // Create a test experiment + const experiment = await createTestExperiment({ + name: "CRUD Test Experiment", + description: "Test experiment for CRUD operations", + }); + + // Get the created experiment + const getResult = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId: experiment.id }); + + expect(getResult.success).toBe(true); + + if (getResult.success) { + expect(getResult.data.id).toBe(experiment.id); + expect(getResult.data.name).toBe("CRUD Test Experiment"); + expect(getResult.data.description).toBe("Test experiment for CRUD operations"); + expect(getResult.data.start_date).toBeNull(); // Should be draft + expect(getResult.data.archived).toBe(false); + } + + // Update the experiment + const updateResult = await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + name: "Updated CRUD Test Experiment", + description: "Updated description", + }, + }); + + expect(updateResult.success).toBe(true); + + if (updateResult.success) { + expect(updateResult.data.name).toBe("Updated CRUD Test Experiment"); + expect(updateResult.data.description).toBe("Updated description"); + } + + // Verify update persisted + const getUpdatedResult = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId: experiment.id }); + + if (getUpdatedResult.success) { + expect(getUpdatedResult.data.name).toBe("Updated CRUD Test Experiment"); + expect(getUpdatedResult.data.description).toBe("Updated description"); + } + }); + + it("should create experiment with different metric types", async () => { + // Test mean metric + const meanExperiment = await createTestExperiment({ + name: "Mean Metric Test", + metrics: [ + { + name: "Page Views", + metric_type: "mean", + event_name: "$pageview", + description: "Average page views per user", + }, + ], + }); + + expect(meanExperiment.metrics).toHaveLength(1); + expect(meanExperiment.metrics?.[0]?.metric_type).toBe("mean"); + + // Test funnel metric + const funnelExperiment = await createTestExperiment({ + name: "Funnel Metric Test", + featureFlagKey: `funnel-test-${Date.now()}`, + metrics: [ + { + name: "Signup Funnel", + metric_type: "funnel", + event_name: "$pageview", + funnel_steps: ["$pageview", "sign_up_start", "sign_up_complete"], + description: "Signup conversion funnel", + }, + ], + }); + + expect(funnelExperiment.metrics).toHaveLength(1); + expect(funnelExperiment.metrics?.[0]?.metric_type).toBe("funnel"); + + // Test ratio metric + const ratioExperiment = await createTestExperiment({ + name: "Ratio Metric Test", + featureFlagKey: `ratio-test-${Date.now()}`, + metrics: [ + { + name: "Click-through Rate", + metric_type: "ratio", + event_name: "button_click", + description: "Button click rate", + }, + ], + }); + + expect(ratioExperiment.metrics).toHaveLength(1); + expect(ratioExperiment.metrics?.[0]?.metric_type).toBe("ratio"); + }); + + it("should handle experiment lifecycle - launch and archive", async () => { + const experiment = await createTestExperiment({ + name: "Lifecycle Test Experiment", + draft: true, + }); + + // Initially should be draft + expect(experiment.start_date).toBeNull(); + expect(experiment.archived).toBe(false); + + // Launch experiment + const launchResult = await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + start_date: new Date().toISOString(), + }, + }); + + expect(launchResult.success).toBe(true); + + if (launchResult.success) { + expect(launchResult.data.start_date).not.toBeNull(); + } + + // Archive experiment + const archiveResult = await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + archived: true, + }, + }); + + expect(archiveResult.success).toBe(true); + + if (archiveResult.success) { + expect(archiveResult.data.archived).toBe(true); + } + }); + + it.skip("should get experiment exposures for launched experiment", async () => { + // Create and launch experiment + const experiment = await createTestExperiment({ + name: "Exposure Test Experiment", + draft: false, // Create as launched + }); + + // Launch the experiment + await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + start_date: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(), // 1 day ago + }, + }); + + // Try to get exposures (may not have data immediately) + const exposureResult = await client + .experiments({ projectId: testProjectId }) + .getExposures({ + experimentId: experiment.id, + refresh: true, + }); + + // Should succeed even if no exposure data yet + expect(exposureResult.success).toBe(true); + + if (exposureResult.success) { + expect(exposureResult.data).toHaveProperty("exposures"); + expect(exposureResult.data.exposures).toBeDefined(); + } + }); + + it("should fail to get exposures for draft experiment", async () => { + const experiment = await createTestExperiment({ + name: "Draft Exposure Test", + draft: true, + }); + + const exposureResult = await client + .experiments({ projectId: testProjectId }) + .getExposures({ + experimentId: experiment.id, + refresh: false, + }); + + expect(exposureResult.success).toBe(false); + expect((exposureResult as any).error.message).toContain("has not started yet"); + }); + + it.skip("should get experiment metric results for launched experiment", async () => { + // Create and launch experiment + const experiment = await createTestExperiment({ + name: "Metric Results Test", + draft: false, + }); + + // Launch the experiment + await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + start_date: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(), + }, + }); + + // Try to get metric results + const metricsResult = await client + .experiments({ projectId: testProjectId }) + .getMetricResults({ + experimentId: experiment.id, + refresh: true, + }); + + expect(metricsResult.success).toBe(true); + + if (metricsResult.success) { + expect(metricsResult.data).toHaveProperty("experiment"); + expect(metricsResult.data).toHaveProperty("primaryMetricsResults"); + expect(metricsResult.data).toHaveProperty("secondaryMetricsResults"); + expect(metricsResult.data).toHaveProperty("exposures"); + expect(metricsResult.data.experiment.id).toBe(experiment.id); + } + }); + + it("should fail to get metric results for draft experiment", async () => { + const experiment = await createTestExperiment({ + name: "Draft Metrics Test", + draft: true, + }); + + const metricsResult = await client + .experiments({ projectId: testProjectId }) + .getMetricResults({ + experimentId: experiment.id, + refresh: false, + }); + + expect(metricsResult.success).toBe(false); + expect((metricsResult as any).error.message).toContain("has not started yet"); + }); + + it("should handle invalid experiment ID", async () => { + const nonExistentId = 999999; + + const getResult = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId: nonExistentId }); + + expect(getResult.success).toBe(false); + }); + + it("should create experiment with custom variants", async () => { + const experiment = await createTestExperiment({ + name: "Custom Variants Test", + }); + + // Verify default variants were created + expect(experiment.parameters?.feature_flag_variants).toHaveLength(2); + + // Update with custom variants + const updateResult = await client.experiments({ projectId: testProjectId }).update({ + experimentId: experiment.id, + updateData: { + parameters: { + feature_flag_variants: [ + { key: "control", rollout_percentage: 25 }, + { key: "variant_a", rollout_percentage: 25 }, + { key: "variant_b", rollout_percentage: 25 }, + { key: "variant_c", rollout_percentage: 25 }, + ], + }, + }, + }); + + expect(updateResult.success).toBe(true); + + if (updateResult.success) { + expect(updateResult.data.parameters?.feature_flag_variants).toHaveLength(4); + const variants = updateResult.data.parameters?.feature_flag_variants || []; + expect(variants.map((v) => v.key)).toEqual([ + "control", + "variant_a", + "variant_b", + "variant_c", + ]); + } + }); + + it("should delete experiment successfully", async () => { + // Create a test experiment to delete + const experiment = await createTestExperiment({ + name: "Delete Test Experiment", + description: "Test experiment for delete operations", + }); + + // Verify experiment exists before deletion + const getBeforeDelete = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId: experiment.id }); + + expect(getBeforeDelete.success).toBe(true); + + if (getBeforeDelete.success) { + expect(getBeforeDelete.data.id).toBe(experiment.id); + expect(getBeforeDelete.data.name).toBe("Delete Test Experiment"); + } + + // Delete the experiment + const deleteResult = await client + .experiments({ projectId: testProjectId }) + .delete({ experimentId: experiment.id }); + + expect(deleteResult.success).toBe(true); + if (deleteResult.success) { + expect(deleteResult.data.success).toBe(true); + expect(deleteResult.data.message).toContain("successfully"); + } + + // Verify experiment is soft deleted (should return 404 or be marked as deleted) + const getAfterDelete = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId: experiment.id }); + + // After soft delete, the API should return an error (404) or the experiment should be marked as deleted + expect(getAfterDelete.success).toBe(false); + }); + + it("should handle deleting non-existent experiment", async () => { + const nonExistentId = 999999999; + + const deleteResult = await client + .experiments({ projectId: testProjectId }) + .delete({ experimentId: nonExistentId }); + + // Should handle gracefully (either success with no-op or specific error) + // The exact behavior depends on the API implementation + expect(typeof deleteResult.success).toBe("boolean"); + }); + + it("should complete full CRUD workflow including delete", async () => { + const timestamp = Date.now(); + + // CREATE + const createResult = await client.experiments({ projectId: testProjectId }).create({ + name: `Full CRUD Test ${timestamp}`, + description: "Complete CRUD workflow test", + feature_flag_key: `full-crud-${timestamp}`, + type: "product", + primary_metrics: [ + { + name: "Test Conversion Rate", + metric_type: "funnel" as const, + event_name: "landing", + funnel_steps: ["landing", "signup", "activation"], + properties: {}, + }, + ], + variants: [ + { key: "control", name: "Control", rollout_percentage: 50 }, + { key: "variant", name: "Variant", rollout_percentage: 50 }, + ], + minimum_detectable_effect: 10, + filter_test_accounts: true, + draft: true, + }); + + expect(createResult.success).toBe(true); + + if (!createResult.success) { + throw new Error("Failed to create experiment for CRUD test"); + } + + const experimentId = createResult.data.id; + createdResources.experiments.push(experimentId); + + // READ + const getResult = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId }); + + expect(getResult.success).toBe(true); + + if (getResult.success) { + expect(getResult.data.id).toBe(experimentId); + expect(getResult.data.name).toBe(`Full CRUD Test ${timestamp}`); + expect(getResult.data.description).toBe("Complete CRUD workflow test"); + } + + // UPDATE + const updateResult = await client.experiments({ projectId: testProjectId }).update({ + experimentId, + updateData: { + name: `Updated Full CRUD Test ${timestamp}`, + description: "Updated description for CRUD test", + }, + }); + + expect(updateResult.success).toBe(true); + + if (updateResult.success) { + expect(updateResult.data.name).toBe(`Updated Full CRUD Test ${timestamp}`); + expect(updateResult.data.description).toBe("Updated description for CRUD test"); + } + + // DELETE + const deleteResult = await client + .experiments({ projectId: testProjectId }) + .delete({ experimentId }); + + expect(deleteResult.success).toBe(true); + if (deleteResult.success) { + expect(deleteResult.data.success).toBe(true); + expect(deleteResult.data.message).toContain("successfully"); + } + + // Verify deletion worked + const getAfterDeleteResult = await client + .experiments({ projectId: testProjectId }) + .get({ experimentId }); + + expect(getAfterDeleteResult.success).toBe(false); + + // Remove from cleanup array since we already deleted it + const index = createdResources.experiments.indexOf(experimentId); + if (index > -1) { + createdResources.experiments.splice(index, 1); + } + }); + + it("should handle delete operations idempotently", async () => { + // Create experiment + const experiment = await createTestExperiment({ + name: "Idempotent Delete Test", + }); + + // First delete should succeed + const firstDeleteResult = await client + .experiments({ projectId: testProjectId }) + .delete({ experimentId: experiment.id }); + + expect(firstDeleteResult.success).toBe(true); + if (firstDeleteResult.success) { + expect(firstDeleteResult.data.success).toBe(true); + expect(firstDeleteResult.data.message).toContain("successfully"); + } + + // Second delete should handle gracefully (idempotent) + const secondDeleteResult = await client + .experiments({ projectId: testProjectId }) + .delete({ experimentId: experiment.id }); + + // Should not throw error, either success or specific "already deleted" error + expect(typeof secondDeleteResult.success).toBe("boolean"); + + // Remove from cleanup array since we already deleted it + const index = createdResources.experiments.indexOf(experiment.id); + if (index > -1) { + createdResources.experiments.splice(index, 1); + } + }); + }); }); diff --git a/typescript/tests/tools/experiments.integration.test.ts b/typescript/tests/tools/experiments.integration.test.ts new file mode 100644 index 0000000..2804760 --- /dev/null +++ b/typescript/tests/tools/experiments.integration.test.ts @@ -0,0 +1,1251 @@ +import { describe, it, expect, beforeAll, afterEach } from "vitest"; +import { + validateEnvironmentVariables, + createTestClient, + createTestContext, + setActiveProjectAndOrg, + cleanupResources, + TEST_PROJECT_ID, + TEST_ORG_ID, + type CreatedResources, + parseToolResponse, + generateUniqueKey, +} from "@/shared/test-utils"; +import createExperimentTool from "@/tools/experiments/create"; +import deleteExperimentTool from "@/tools/experiments/delete"; +import getAllExperimentsTool from "@/tools/experiments/getAll"; +import getExperimentTool from "@/tools/experiments/get"; +import getExperimentResultsTool from "@/tools/experiments/getResults"; +import updateExperimentTool from "@/tools/experiments/update"; +import type { Context } from "@/tools/types"; + +describe("Experiments", { concurrent: false }, () => { + let context: Context; + const createdResources: CreatedResources = { + featureFlags: [], + insights: [], + dashboards: [], + surveys: [], + }; + const createdExperiments: number[] = []; + + // Helper function to track created experiments and their feature flags + const trackExperiment = (experiment: any) => { + if (experiment.id) { + createdExperiments.push(experiment.id); + } + if (experiment.feature_flag?.id) { + createdResources.featureFlags.push(experiment.feature_flag.id); + } + }; + + beforeAll(async () => { + validateEnvironmentVariables(); + const client = createTestClient(); + context = createTestContext(client); + await setActiveProjectAndOrg(context, TEST_PROJECT_ID!, TEST_ORG_ID!); + }); + + afterEach(async () => { + // Clean up experiments first + for (const experimentId of createdExperiments) { + try { + await context.api.experiments({ projectId: TEST_PROJECT_ID! }).delete({ + experimentId, + }); + } catch (error) { + console.warn(`Failed to cleanup experiment ${experimentId}:`, error); + } + } + createdExperiments.length = 0; + + // Clean up associated feature flags + await cleanupResources(context.api, TEST_PROJECT_ID!, createdResources); + }); + + describe("create-experiment tool", () => { + const createTool = createExperimentTool(); + + it("should create a draft experiment with minimal required fields", async () => { + // Note: API auto-creates feature flag if it doesn't exist + const flagKey = generateUniqueKey("exp-flag"); + + // Create experiment + const params = { + name: "Minimal Test Experiment", + feature_flag_key: flagKey, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.name).toBe(params.name); + expect(experiment.feature_flag_key).toBe(params.feature_flag_key); + expect(experiment.start_date).toBeNull(); // Draft experiments have no start date + expect(experiment.url).toContain("/experiments/"); + + trackExperiment(experiment); + }); + + it("should create an experiment with description and type", async () => { + const flagKey = generateUniqueKey("exp-flag-desc"); + + const params = { + name: "Detailed Test Experiment", + description: "This experiment tests the impact of button color on conversions", + feature_flag_key: flagKey, + type: "web" as const, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.name).toBe(params.name); + expect(experiment.feature_flag_key).toBe(params.feature_flag_key); + + trackExperiment(experiment); + }); + + it("should create an experiment with custom variants", async () => { + const flagKey = generateUniqueKey("exp-flag-variants"); + + const params = { + name: "Variant Test Experiment", + feature_flag_key: flagKey, + variants: [ + { key: "control", name: "Control Group", rollout_percentage: 33 }, + { key: "variant_a", name: "Variant A", rollout_percentage: 33 }, + { key: "variant_b", name: "Variant B", rollout_percentage: 34 }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.parameters?.feature_flag_variants).toHaveLength(3); + expect(experiment.parameters?.feature_flag_variants?.[0]?.key).toBe("control"); + expect(experiment.parameters?.feature_flag_variants?.[0]?.rollout_percentage).toBe(33); + + trackExperiment(experiment); + }); + + it("should create an experiment with mean metric", async () => { + const flagKey = generateUniqueKey("exp-flag-mean"); + + const params = { + name: "Mean Metric Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Average Page Load Time", + metric_type: "mean" as const, + event_name: "$pageview", + properties: { page: "/checkout" }, + description: "Measure average page load time for checkout page", + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(1); + + trackExperiment(experiment); + }); + + it("should create an experiment with funnel metric", async () => { + const flagKey = generateUniqueKey("exp-flag-funnel"); + + const params = { + name: "Funnel Metric Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Checkout Conversion Funnel", + metric_type: "funnel" as const, + event_name: "product_view", + funnel_steps: ["product_view", "add_to_cart", "checkout_start", "purchase"], + description: "Track conversion through checkout funnel", + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(1); + + trackExperiment(experiment); + }); + + it("should create an experiment with ratio metric", async () => { + const flagKey = generateUniqueKey("exp-flag-ratio"); + + const params = { + name: "Ratio Metric Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Button Click Rate", + metric_type: "ratio" as const, + event_name: "button_click", + description: "Ratio of button clicks to page views", + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(1); + + trackExperiment(experiment); + }); + + it("should create an experiment with multiple metrics", async () => { + const flagKey = generateUniqueKey("exp-flag-multi"); + + const params = { + name: "Multi Metric Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Conversion Rate", + metric_type: "funnel" as const, + event_name: "visit", + funnel_steps: ["visit", "signup", "purchase"], + }, + { + name: "Average Revenue", + metric_type: "mean" as const, + event_name: "purchase", + }, + ], + secondary_metrics: [ + { + name: "Page Views", + metric_type: "mean" as const, + event_name: "$pageview", + }, + { + name: "Bounce Rate", + metric_type: "ratio" as const, + event_name: "bounce", + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(2); + expect(experiment.metrics_secondary).toHaveLength(2); + + trackExperiment(experiment); + }); + + it("should create an experiment with minimum detectable effect", async () => { + const flagKey = generateUniqueKey("exp-flag-mde"); + + const params = { + name: "MDE Test Experiment", + feature_flag_key: flagKey, + minimum_detectable_effect: 15, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + + trackExperiment(experiment); + }); + + it("should create an experiment with filter test accounts enabled", async () => { + const flagKey = generateUniqueKey("exp-flag-filter"); + + const params = { + name: "Filter Test Accounts Experiment", + feature_flag_key: flagKey, + filter_test_accounts: true, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + + trackExperiment(experiment); + }); + + it("should create experiment when feature flag doesn't exist (API creates it)", async () => { + // Note: The API might auto-create the feature flag if it doesn't exist + const params = { + name: "Auto-Create Flag Experiment", + feature_flag_key: generateUniqueKey("auto-created-flag"), + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + trackExperiment(experiment); + }); + }); + + describe("get-all-experiments tool", () => { + const createTool = createExperimentTool(); + const getAllTool = getAllExperimentsTool(); + + it("should list all experiments", async () => { + // Create a few test experiments + const testExperiments = []; + for (let i = 0; i < 3; i++) { + const flagKey = generateUniqueKey(`exp-list-flag-${i}`); + + const params = { + name: `List Test Experiment ${i}`, + feature_flag_key: flagKey, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + testExperiments.push(experiment); + trackExperiment(experiment); + } + + // Get all experiments + const result = await getAllTool.handler(context, {}); + const allExperiments = parseToolResponse(result); + + expect(Array.isArray(allExperiments)).toBe(true); + expect(allExperiments.length).toBeGreaterThanOrEqual(3); + + // Verify our test experiments are in the list + for (const testExp of testExperiments) { + const found = allExperiments.find((e: any) => e.id === testExp.id); + expect(found).toBeDefined(); + } + }); + + it("should return experiments with proper structure", async () => { + const result = await getAllTool.handler(context, {}); + const experiments = parseToolResponse(result); + + if (experiments.length > 0) { + const experiment = experiments[0]; + expect(experiment).toHaveProperty("id"); + expect(experiment).toHaveProperty("name"); + expect(experiment).toHaveProperty("feature_flag_key"); + } + }); + }); + + describe("get-experiment tool", () => { + const createTool = createExperimentTool(); + const getTool = getExperimentTool(); + + it("should get experiment by ID", async () => { + // Create an experiment + const flagKey = generateUniqueKey("exp-get-flag"); + + const createParams = { + name: "Get Test Experiment", + description: "Test experiment for get operation", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const createdExperiment = parseToolResponse(createResult); + trackExperiment(createdExperiment); + + // Get the experiment + const result = await getTool.handler(context, { experimentId: createdExperiment.id }); + const retrievedExperiment = parseToolResponse(result); + + expect(retrievedExperiment.id).toBe(createdExperiment.id); + expect(retrievedExperiment.name).toBe(createParams.name); + expect(retrievedExperiment.feature_flag_key).toBe(createParams.feature_flag_key); + }); + + it("should handle non-existent experiment ID", async () => { + const nonExistentId = 999999; + + await expect( + getTool.handler(context, { experimentId: nonExistentId }), + ).rejects.toThrow(); + }); + }); + + describe("get-experiment-results tool", () => { + const createTool = createExperimentTool(); + const getResultsTool = getExperimentResultsTool(); + + it("should fail for draft experiment (not started)", async () => { + // Create a draft experiment with metrics + const flagKey = generateUniqueKey("exp-metrics-flag"); + + const createParams = { + name: "Metrics Draft Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Test Metric", + metric_type: "mean" as const, + event_name: "$pageview", + }, + ], + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Try to get metric results for draft experiment + await expect( + getResultsTool.handler(context, { + experimentId: experiment.id, + refresh: false, + }), + ).rejects.toThrow(/has not started yet/); + }); + + it("should handle refresh parameter", async () => { + // Create an experiment with metrics + const flagKey = generateUniqueKey("exp-metrics-refresh-flag"); + + const createParams = { + name: "Metrics Refresh Test Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Refresh Test Metric", + metric_type: "mean" as const, + event_name: "$pageview", + }, + ], + secondary_metrics: [ + { + name: "Secondary Refresh Metric", + metric_type: "ratio" as const, + event_name: "button_click", + }, + ], + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Test with refresh=true (will still fail for draft, but tests parameter handling) + await expect( + getResultsTool.handler(context, { + experimentId: experiment.id, + refresh: true, + }), + ).rejects.toThrow(/has not started yet/); + }); + }); + + describe("Complex experiment workflows", () => { + const createTool = createExperimentTool(); + const getTool = getExperimentTool(); + const getAllTool = getAllExperimentsTool(); + + it("should support complete experiment creation and retrieval workflow", async () => { + // Create feature flag + const flagKey = generateUniqueKey("exp-workflow-flag"); + + // Create comprehensive experiment + const createParams = { + name: "Complete Workflow Experiment", + description: "Testing complete experiment workflow with all features", + feature_flag_key: flagKey, + type: "product" as const, + variants: [ + { key: "control", name: "Control", rollout_percentage: 50 }, + { key: "test", name: "Test Variant", rollout_percentage: 50 }, + ], + primary_metrics: [ + { + name: "Conversion Funnel", + metric_type: "funnel" as const, + event_name: "landing", + funnel_steps: ["landing", "signup", "activation"], + description: "Main conversion funnel", + }, + { + name: "Revenue per User", + metric_type: "mean" as const, + event_name: "purchase", + description: "Average revenue", + }, + ], + secondary_metrics: [ + { + name: "Engagement Rate", + metric_type: "ratio" as const, + event_name: "engagement", + description: "User engagement ratio", + }, + ], + minimum_detectable_effect: 20, + filter_test_accounts: true, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const createdExperiment = parseToolResponse(createResult); + trackExperiment(createdExperiment); + + // Verify creation + expect(createdExperiment.id).toBeDefined(); + expect(createdExperiment.name).toBe(createParams.name); + expect(createdExperiment.parameters?.feature_flag_variants).toHaveLength(2); + expect(createdExperiment.metrics).toHaveLength(2); + expect(createdExperiment.metrics_secondary).toHaveLength(1); + + // Get the experiment + const getResult = await getTool.handler(context, { + experimentId: createdExperiment.id, + }); + const retrievedExperiment = parseToolResponse(getResult); + expect(retrievedExperiment.id).toBe(createdExperiment.id); + + // Verify it appears in list + const listResult = await getAllTool.handler(context, {}); + const allExperiments = parseToolResponse(listResult); + const found = allExperiments.find((e: any) => e.id === createdExperiment.id); + expect(found).toBeDefined(); + }); + + it("should create experiment with complex funnel metrics", async () => { + const flagKey = generateUniqueKey("exp-complex-funnel-flag"); + + const params = { + name: "Complex Funnel Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "E-commerce Full Funnel", + metric_type: "funnel" as const, + event_name: "home_page_view", + funnel_steps: [ + "home_page_view", + "product_list_view", + "product_detail_view", + "add_to_cart", + "checkout_start", + "payment_info_entered", + "order_completed", + ], + description: "Complete e-commerce conversion funnel", + }, + ], + secondary_metrics: [ + { + name: "Cart Abandonment Funnel", + metric_type: "funnel" as const, + event_name: "add_to_cart", + funnel_steps: ["add_to_cart", "checkout_start", "order_completed"], + description: "Track where users drop off in checkout", + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(1); + expect(experiment.metrics_secondary).toHaveLength(1); + + trackExperiment(experiment); + }); + + it("should create experiment with target properties", async () => { + const flagKey = generateUniqueKey("exp-target-props-flag"); + + const params = { + name: "Targeted Experiment", + feature_flag_key: flagKey, + target_properties: { + country: "US", + plan: "premium", + cohort: "early_adopters", + }, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + + trackExperiment(experiment); + }); + + it("should create experiment without holdout group", async () => { + const flagKey = generateUniqueKey("exp-no-holdout-flag"); + + const params = { + name: "No Holdout Group Experiment", + feature_flag_key: flagKey, + // Not setting holdout_id (as it may not exist) + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + + trackExperiment(experiment); + }); + }); + + describe("Edge cases and error handling", () => { + const createTool = createExperimentTool(); + const getTool = getExperimentTool(); + const getResultsTool = getExperimentResultsTool(); + + it("should handle creating experiment without metrics", async () => { + const flagKey = generateUniqueKey("exp-no-metrics-flag"); + + const params = { + name: "No Metrics Experiment", + feature_flag_key: flagKey, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics || []).toHaveLength(0); + expect(experiment.metrics_secondary || []).toHaveLength(0); + + trackExperiment(experiment); + }); + + it("should handle invalid experiment ID in get operations", async () => { + const invalidId = 999999999; + + // Test get experiment + await expect(getTool.handler(context, { experimentId: invalidId })).rejects.toThrow(); + + // Test get metric results + await expect( + getResultsTool.handler(context, { + experimentId: invalidId, + refresh: false, + }), + ).rejects.toThrow(); + }); + + it("should handle variants with invalid rollout percentages", async () => { + const flagKey = generateUniqueKey("exp-invalid-rollout-flag"); + + const params = { + name: "Invalid Rollout Experiment", + feature_flag_key: flagKey, + variants: [ + { key: "control", rollout_percentage: 60 }, + { key: "test", rollout_percentage: 60 }, // Total > 100% + ], + draft: true, + }; + + // This might succeed or fail depending on API validation + // Just ensure it doesn't crash the test suite + try { + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + trackExperiment(experiment); + } catch (error) { + // Expected for invalid configuration + expect(error).toBeDefined(); + } + }); + + it("should handle metric with explicit event_name", async () => { + const flagKey = generateUniqueKey("exp-explicit-event-flag"); + + const params = { + name: "Explicit Event Name Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Default Event Metric", + metric_type: "mean" as const, + event_name: "$pageview", // Explicit event_name since it's now required + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + expect(experiment.metrics).toHaveLength(1); + + trackExperiment(experiment); + }); + + it("should handle empty funnel steps array", async () => { + const flagKey = generateUniqueKey("exp-empty-funnel-flag"); + + const params = { + name: "Empty Funnel Steps Experiment", + feature_flag_key: flagKey, + primary_metrics: [ + { + name: "Empty Funnel", + metric_type: "funnel" as const, + funnel_steps: [], // Empty array + event_name: "$pageview", // Falls back to this + }, + ], + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.id).toBeDefined(); + + trackExperiment(experiment); + }); + + it("should handle very long experiment names", async () => { + const flagKey = generateUniqueKey("exp-long-name-flag"); + + const longName = "A".repeat(500); // Very long name + const params = { + name: longName, + feature_flag_key: flagKey, + draft: true, + }; + + try { + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + expect(experiment.id).toBeDefined(); + trackExperiment(experiment); + } catch (error) { + // Some APIs might reject very long names + expect(error).toBeDefined(); + } + }); + }); + + describe("delete-experiment tool", () => { + const createTool = createExperimentTool(); + const deleteTool = deleteExperimentTool(); + + it("should delete an existing experiment", async () => { + // Create experiment first + const flagKey = generateUniqueKey("exp-delete-flag"); + + const createParams = { + name: "Experiment to Delete", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + expect(experiment.id).toBeDefined(); + trackExperiment(experiment); + + // Delete the experiment + const deleteParams = { experimentId: experiment.id }; + const deleteResult = await deleteTool.handler(context, deleteParams); + const deleteResponse = parseToolResponse(deleteResult); + + expect(deleteResponse.success).toBe(true); + expect(deleteResponse.message).toBe("Experiment deleted successfully"); + + // Remove from tracking since we deleted it manually + const index = createdExperiments.indexOf(experiment.id); + if (index > -1) { + createdExperiments.splice(index, 1); + } + + // Clean up the feature flag that was auto-created + if (experiment.feature_flag?.id) { + createdResources.featureFlags.push(experiment.feature_flag.id); + } + }); + + it("should handle invalid experiment ID", async () => { + const invalidId = 999999; + + const deleteParams = { experimentId: invalidId }; + + try { + await deleteTool.handler(context, deleteParams); + expect.fail("Should have thrown an error for invalid experiment ID"); + } catch (error) { + expect(error).toBeDefined(); + expect((error as Error).message).toContain("Failed to delete experiment"); + } + }); + + it("should handle already deleted experiment gracefully", async () => { + // Create experiment first + const flagKey = generateUniqueKey("exp-already-deleted-flag"); + + const createParams = { + name: "Experiment Already Deleted", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + expect(experiment.id).toBeDefined(); + trackExperiment(experiment); + + // Delete the experiment twice + const deleteParams = { experimentId: experiment.id }; + + // First delete should succeed + const firstDeleteResult = await deleteTool.handler(context, deleteParams); + const firstDeleteResponse = parseToolResponse(firstDeleteResult); + expect(firstDeleteResponse.success).toBe(true); + + // Second delete should throw error (API returns 404 for already deleted) + try { + await deleteTool.handler(context, deleteParams); + expect.fail("Should have thrown an error for already deleted experiment"); + } catch (error) { + expect(error).toBeDefined(); + expect((error as Error).message).toContain("Failed to delete experiment"); + expect((error as Error).message).toContain("404"); + } + + // Remove from tracking since we deleted it manually + const index = createdExperiments.indexOf(experiment.id); + if (index > -1) { + createdExperiments.splice(index, 1); + } + + // Clean up the feature flag that was auto-created + if (experiment.feature_flag?.id) { + createdResources.featureFlags.push(experiment.feature_flag.id); + } + }); + + it("should validate required experimentId parameter", async () => { + try { + await deleteTool.handler(context, {} as any); + expect.fail("Should have thrown validation error for missing experimentId"); + } catch (error) { + expect(error).toBeDefined(); + } + }); + }); + + describe("update-experiment tool", () => { + const createTool = createExperimentTool(); + const updateTool = updateExperimentTool(); + + it("should update basic experiment fields", async () => { + // Create experiment first + const flagKey = generateUniqueKey("exp-update-basic-flag"); + + const createParams = { + name: "Original Name", + description: "Original description", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + expect(experiment.id).toBeDefined(); + trackExperiment(experiment); + + // Update basic fields + const updateParams = { + experimentId: experiment.id, + data: { + name: "Updated Name", + description: "Updated description with new hypothesis", + }, + }; + + const updateResult = await updateTool.handler(context, updateParams); + const updatedExperiment = parseToolResponse(updateResult); + + expect(updatedExperiment.name).toBe("Updated Name"); + expect(updatedExperiment.description).toBe("Updated description with new hypothesis"); + expect(updatedExperiment.url).toContain("/experiments/"); + expect(updatedExperiment.start_date).toBeNull(); // Draft experiments have no start date + + trackExperiment(experiment); + }); + + it("should launch a draft experiment (draft → running)", async () => { + // Create draft experiment + const flagKey = generateUniqueKey("exp-launch-flag"); + + const createParams = { + name: "Launch Test Experiment", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + expect(experiment.start_date).toBeNull(); // Draft experiments have no start date + trackExperiment(experiment); + + // Launch the experiment + const launchParams = { + experimentId: experiment.id, + data: { + launch: true, + }, + }; + + const updateResult = await updateTool.handler(context, launchParams); + const launchedExperiment = parseToolResponse(updateResult); + + expect(launchedExperiment.start_date).toBeDefined(); // Running experiments have start date + expect(launchedExperiment.end_date).toBeNull(); // But no end date yet + + trackExperiment(experiment); + }); + + it("should stop a running experiment", async () => { + // Create and launch experiment + const flagKey = generateUniqueKey("exp-stop-flag"); + + const createParams = { + name: "Stop Test Experiment", + feature_flag_key: flagKey, + draft: false, // Create as launched + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Stop the experiment + const stopParams = { + experimentId: experiment.id, + data: { + end_date: new Date().toISOString(), + conclusion: "stopped_early" as const, + conclusion_comment: "Test completed successfully", + }, + }; + + const updateResult = await updateTool.handler(context, stopParams); + const stoppedExperiment = parseToolResponse(updateResult); + + expect(stoppedExperiment.end_date).toBeDefined(); + // Note: API may not set conclusion field automatically, it depends on the backend implementation + // The important thing is that end_date is set, indicating the experiment is stopped + + trackExperiment(experiment); + }); + + it("should restart a concluded experiment", async () => { + // Create and conclude experiment + const flagKey = generateUniqueKey("exp-restart-flag"); + + const createParams = { + name: "Restart Test Experiment", + feature_flag_key: flagKey, + draft: false, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // First stop it + const stopParams = { + experimentId: experiment.id, + data: { + end_date: new Date().toISOString(), + conclusion: "inconclusive" as const, + conclusion_comment: "Need more data", + }, + }; + + await updateTool.handler(context, stopParams); + + // Now restart it (following restart workflow) + const restartParams = { + experimentId: experiment.id, + data: { + restart: true, + launch: true, + }, + }; + + const restartResult = await updateTool.handler(context, restartParams); + const restartedExperiment = parseToolResponse(restartResult); + + expect(restartedExperiment.end_date).toBeNull(); + expect(restartedExperiment.conclusion).toBeNull(); + expect(restartedExperiment.conclusion_comment).toBeNull(); + expect(restartedExperiment.start_date).toBeDefined(); // Restarted experiments have start date + expect(restartedExperiment.end_date).toBeNull(); // But no end date + + trackExperiment(experiment); + }); + + it("should restart experiment as draft", async () => { + // Create and conclude experiment + const flagKey = generateUniqueKey("exp-restart-draft-flag"); + + const createParams = { + name: "Restart as Draft Test", + feature_flag_key: flagKey, + draft: false, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // First conclude it + const concludeParams = { + experimentId: experiment.id, + data: { + conclude: "won" as const, + }, + }; + + await updateTool.handler(context, concludeParams); + + // Restart as draft (clear all completion fields including start_date) + const restartAsDraftParams = { + experimentId: experiment.id, + data: { + restart: true, + }, + }; + + const restartResult = await updateTool.handler(context, restartAsDraftParams); + const restartedExperiment = parseToolResponse(restartResult); + + expect(restartedExperiment.end_date).toBeNull(); + expect(restartedExperiment.conclusion).toBeNull(); + expect(restartedExperiment.start_date).toBeNull(); // Draft experiments have no start date + + trackExperiment(experiment); + }); + + it("should archive and unarchive experiment", async () => { + // Create experiment + const flagKey = generateUniqueKey("exp-archive-flag"); + + const createParams = { + name: "Archive Test Experiment", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Archive the experiment + const archiveParams = { + experimentId: experiment.id, + data: { + archive: true, + }, + }; + + const archiveResult = await updateTool.handler(context, archiveParams); + const archivedExperiment = parseToolResponse(archiveResult); + + expect(archivedExperiment.archived).toBe(true); + + // Unarchive the experiment + const unarchiveParams = { + experimentId: experiment.id, + data: { + archive: false, + }, + }; + + const unarchiveResult = await updateTool.handler(context, unarchiveParams); + const unarchivedExperiment = parseToolResponse(unarchiveResult); + + expect(unarchivedExperiment.archived).toBe(false); + + trackExperiment(experiment); + }); + + it("should update experiment variants", async () => { + // Create experiment with default variants + const flagKey = generateUniqueKey("exp-variants-flag"); + + const createParams = { + name: "Variants Update Test", + feature_flag_key: flagKey, + draft: true, + variants: [ + { key: "control", rollout_percentage: 50 }, + { key: "test", rollout_percentage: 50 }, + ], + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Update minimum detectable effect + const updateParamsParams = { + experimentId: experiment.id, + data: { + minimum_detectable_effect: 25, + }, + }; + + const updateResult = await updateTool.handler(context, updateParamsParams); + const updatedExperiment = parseToolResponse(updateResult); + + expect(updatedExperiment.parameters?.minimum_detectable_effect).toBe(25); + + trackExperiment(experiment); + }); + + it("should handle invalid experiment ID", async () => { + const invalidId = 999999; + + const updateParams = { + experimentId: invalidId, + data: { + name: "This should fail", + }, + }; + + try { + await updateTool.handler(context, updateParams); + expect.fail("Should have thrown an error for invalid experiment ID"); + } catch (error) { + expect(error).toBeDefined(); + expect((error as Error).message).toContain("Failed to update experiment"); + } + }); + + it("should validate required experimentId parameter", async () => { + try { + await updateTool.handler(context, { data: { name: "Test" } } as any); + expect.fail("Should have thrown validation error for missing experimentId"); + } catch (error) { + expect(error).toBeDefined(); + } + }); + + it("should handle partial updates correctly", async () => { + // Create experiment + const flagKey = generateUniqueKey("exp-partial-flag"); + + const createParams = { + name: "Partial Update Test", + description: "Original description", + feature_flag_key: flagKey, + draft: true, + }; + + const createResult = await createTool.handler(context, createParams as any); + const experiment = parseToolResponse(createResult); + trackExperiment(experiment); + + // Update only name, leaving description unchanged + const updateParams = { + experimentId: experiment.id, + data: { + name: "Updated Name Only", + }, + }; + + const updateResult = await updateTool.handler(context, updateParams); + const updatedExperiment = parseToolResponse(updateResult); + + expect(updatedExperiment.name).toBe("Updated Name Only"); + // Description should remain unchanged + expect(updatedExperiment.description).toBe("Original description"); + + trackExperiment(experiment); + }); + }); + + describe("Experiment status handling", () => { + const createTool = createExperimentTool(); + + it("should correctly identify draft experiments", async () => { + const flagKey = generateUniqueKey("exp-draft-status-flag"); + + const params = { + name: "Draft Status Experiment", + feature_flag_key: flagKey, + draft: true, + }; + + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + expect(experiment.start_date).toBeNull(); // Draft experiments have no start date + + trackExperiment(experiment); + }); + + it("should handle immediate launch (non-draft) experiments", async () => { + const flagKey = generateUniqueKey("exp-launch-flag"); + + const params = { + name: "Immediate Launch Experiment", + feature_flag_key: flagKey, + draft: false, + }; + + try { + const result = await createTool.handler(context, params as any); + const experiment = parseToolResponse(result); + + // Check actual date fields instead of computed status + expect(experiment.start_date).toBeDefined(); // Should have start date if launched + + trackExperiment(experiment); + } catch (error) { + // Some environments might not allow immediate launch + expect(error).toBeDefined(); + } + }); + }); +});