From 68246a6114e107e41e118bdceaea8629dded501d Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 08:20:21 -0500 Subject: [PATCH 01/14] refactor(worker): migrate amass and subfinder to Dynamic Args Pattern - Replace embedded shell scripts with clean shell wrapper pattern - Add buildAmassArgs() and buildSubfinderArgs() TypeScript functions - Use IsolatedContainerVolume for secure file I/O in both components - Add -silent flag to amass to prevent progress bar spam - Add passive mode parameter to amass (default: true for quick scans) - Add new parameters to subfinder: threads, timeout, rateLimit, etc. - Mount provider config as file instead of base64 env var in subfinder - Move output parsing from shell to TypeScript for both components - Update subfinder image to v2.12.0 Signed-off-by: Aseem Shrey --- worker/src/components/security/amass.ts | 618 +++++++++++--------- worker/src/components/security/subfinder.ts | 409 +++++++++---- 2 files changed, 649 insertions(+), 378 deletions(-) diff --git a/worker/src/components/security/amass.ts b/worker/src/components/security/amass.ts index c3d1a32a..8b7a1861 100644 --- a/worker/src/components/security/amass.ts +++ b/worker/src/components/security/amass.ts @@ -3,15 +3,29 @@ import { componentRegistry, ComponentRetryPolicy, runComponentWithRunner, - ServiceError, - ValidationError, + ContainerError, defineComponent, inputs, outputs, parameters, port, param, + type DockerRunnerConfig, } from '@shipsec/component-sdk'; +import { IsolatedContainerVolume } from '../../utils/isolated-volume'; + +const AMASS_IMAGE = 'owaspamass/amass:v5.0.1'; +const AMASS_TIMEOUT_SECONDS = (() => { + const raw = process.env.AMASS_TIMEOUT_SECONDS; + const parsed = raw ? Number.parseInt(raw, 10) : NaN; + if (!Number.isFinite(parsed) || Number.isNaN(parsed)) { + return 600; + } + return parsed; +})(); +const INPUT_MOUNT_NAME = 'inputs'; +const CONTAINER_INPUT_DIR = `/${INPUT_MOUNT_NAME}`; +const DOMAIN_FILE_NAME = 'domains.txt'; const inputSchema = inputs({ domains: port( @@ -28,6 +42,18 @@ const inputSchema = inputs({ }); const parameterSchema = parameters({ + passive: param( + z + .boolean() + .default(true) + .describe('Use passive mode only (no DNS queries, faster)'), + { + label: 'Passive Mode', + editor: 'boolean', + description: 'Skip DNS verification for faster execution (recommended for quick scans).', + helpText: 'Disable only if you need verified DNS records.', + }, + ), active: param( z .boolean() @@ -126,7 +152,7 @@ const parameterSchema = parameters({ label: 'Custom CLI Flags', editor: 'textarea', rows: 3, - placeholder: '--passive --config /work/config.yaml', + placeholder: '--config /work/config.yaml', description: 'Paste additional Amass CLI options exactly as you would on the command line.', helpText: 'Flags are appended after the generated options; avoid duplicating -d domain arguments.', @@ -192,6 +218,7 @@ const outputSchema = outputs({ }), options: port( z.object({ + passive: z.boolean(), active: z.boolean(), bruteForce: z.boolean(), includeIps: z.boolean(), @@ -213,14 +240,155 @@ const outputSchema = outputs({ ), }); -const dockerTimeoutSeconds = (() => { - const raw = process.env.AMASS_TIMEOUT_SECONDS; - const parsed = raw ? Number.parseInt(raw, 10) : NaN; - if (!Number.isFinite(parsed) || Number.isNaN(parsed)) { - return 600; +// Split custom CLI flags into an array of arguments +const splitCliArgs = (input: string): string[] => { + const args: string[] = []; + let current = ''; + let quote: '"' | "'" | null = null; + let escape = false; + + for (const ch of input) { + if (escape) { + current += ch; + escape = false; + continue; + } + + if (ch === '\\') { + escape = true; + continue; + } + + if (quote) { + if (ch === quote) { + quote = null; + } else { + current += ch; + } + continue; + } + + if (ch === '"' || ch === "'") { + quote = ch as '"' | "'"; + continue; + } + + if (/\s/.test(ch)) { + if (current.length > 0) { + args.push(current); + current = ''; + } + continue; + } + + current += ch; + } + + if (current.length > 0) { + args.push(current); + } + + return args; +}; + +interface BuildAmassArgsOptions { + domainFile: string; + passive: boolean; + active: boolean; + bruteForce: boolean; + enableAlterations: boolean; + recursive: boolean; + minForRecursive?: number; + maxDepth?: number; + dnsQueryRate?: number; + includeIps: boolean; + verbose: boolean; + demoMode: boolean; + timeoutMinutes?: number; + customFlags: string[]; +} + +/** + * Build Amass CLI arguments in TypeScript. + * This follows the Dynamic Args Pattern recommended in component-development.md + */ +const buildAmassArgs = (options: BuildAmassArgsOptions): string[] => { + const args: string[] = ['enum']; + + // CRITICAL: Always use -silent to prevent progress bar spam + // Without this, Amass outputs "0 / 1 [____]" to stderr hundreds of times per second + // This floods Loki and can cause system overload (see incident report) + args.push('-silent'); + + // Domain file input + args.push('-df', options.domainFile); + + // Passive mode - recommended for quick scans + if (options.passive) { + args.push('-passive'); + } + + // Active techniques (zone transfers, cert grabs) + if (options.active) { + args.push('-active'); } - return parsed; -})(); + + // Brute force + if (options.bruteForce) { + args.push('-brute'); + } + + // Alterations engine + if (options.enableAlterations) { + args.push('-alts'); + } + + // Include IP addresses + if (options.includeIps) { + args.push('-ip'); + } + + // Recursive brute forcing + if (!options.recursive) { + args.push('-norecursive'); + } else if (typeof options.minForRecursive === 'number' && options.minForRecursive >= 1) { + args.push('-min-for-recursive', String(options.minForRecursive)); + } + + // Max depth + if (typeof options.maxDepth === 'number' && options.maxDepth >= 1) { + args.push('-max-depth', String(options.maxDepth)); + } + + // DNS query rate + if (typeof options.dnsQueryRate === 'number' && options.dnsQueryRate >= 1) { + args.push('-dns-qps', String(options.dnsQueryRate)); + } + + // Timeout + if (typeof options.timeoutMinutes === 'number' && options.timeoutMinutes >= 1) { + args.push('-timeout', String(options.timeoutMinutes)); + } + + // Verbose + if (options.verbose) { + args.push('-v'); + } + + // Demo mode + if (options.demoMode) { + args.push('-demo'); + } + + // Custom flags (appended last) + for (const flag of options.customFlags) { + if (flag.length > 0) { + args.push(flag); + } + } + + return args; +}; // Retry policy for Amass - long-running subdomain enumeration const amassRetryPolicy: ComponentRetryPolicy = { @@ -238,219 +406,20 @@ const definition = defineComponent({ retryPolicy: amassRetryPolicy, runner: { kind: 'docker', - image: 'owaspamass/amass:v4.2.0', + image: AMASS_IMAGE, + // IMPORTANT: Use shell wrapper for PTY compatibility + // Running CLI tools directly as entrypoint can cause them to hang with PTY (pseudo-terminal) + // The shell wrapper ensures proper TTY signal handling and clean exit + // See docs/component-development.md "Docker Entrypoint Pattern" for details entrypoint: 'sh', network: 'bridge', - timeoutSeconds: dockerTimeoutSeconds, - command: [ - '-c', - String.raw`set -eo pipefail - -export HOME=/tmp -mkdir -p "$HOME/.config/amass" - -INPUT=$(cat) - -DOMAINS_SECTION=$(printf "%s" "$INPUT" | tr -d '\n' | sed -n 's/.*"domains":[[:space:]]*\[\([^]]*\)\].*/\1/p') - -if [ -z "$DOMAINS_SECTION" ]; then - printf '{"subdomains":[],"rawOutput":"","domainCount":0,"subdomainCount":0,"options":{"active":false,"bruteForce":false,"includeIps":false,"enableAlterations":false,"recursive":true,"verbose":false,"demoMode":false,"timeoutMinutes":null,"minForRecursive":null,"maxDepth":null,"dnsQueryRate":null,"customFlags":null}}' - exit 0 -fi - -DOMAIN_LIST=$(printf "%s" "$DOMAINS_SECTION" | tr ',' '\n' | sed 's/"//g; s/^[[:space:]]*//; s/[[:space:]]*$//' | sed '/^$/d') - -if [ -z "$DOMAIN_LIST" ]; then - printf '{"subdomains":[],"rawOutput":"","domainCount":0,"subdomainCount":0,"options":{"active":false,"bruteForce":false,"includeIps":false,"enableAlterations":false,"recursive":true,"verbose":false,"demoMode":false,"timeoutMinutes":null,"minForRecursive":null,"maxDepth":null,"dnsQueryRate":null,"customFlags":null}}' - exit 0 -fi - -extract_bool() { - key="$1" - default="$2" - value=$(printf "%s" "$INPUT" | tr -d '\n' | grep -o "\"$key\":[[:space:]]*\\(true\\|false\\)" | head -n1 | sed 's/.*://; s/[[:space:]]//g') - if [ -z "$value" ]; then - value="$default" - fi - if [ "$value" = "true" ]; then - echo "true" - else - echo "false" - fi -} - -extract_number() { - key="$1" - value=$(printf "%s" "$INPUT" | tr -d '\n' | grep -o "\"$key\":[[:space:]]*[0-9][0-9]*" | head -n1 | sed 's/[^0-9]//g') - if [ -z "$value" ]; then - echo "" - else - echo "$value" - fi -} - -extract_string() { - key="$1" - printf "%s" "$INPUT" | tr '\n' ' ' | sed -n "s/.*\"$key\":[[:space:]]*\"\\([^\"]*\\)\".*/\\1/p" | head -n1 -} - -ACTIVE=$(extract_bool "active" "false") -BRUTE=$(extract_bool "bruteForce" "false") -INCLUDE_IPS=$(extract_bool "includeIps" "false") -ALTERATIONS=$(extract_bool "enableAlterations" "false") -RECURSIVE=$(extract_bool "recursive" "true") -VERBOSE=$(extract_bool "verbose" "false") -DEMO=$(extract_bool "demoMode" "false") -TIMEOUT=$(extract_number "timeoutMinutes") -MIN_FOR_RECUR=$(extract_number "minForRecursive") -MAX_DEPTH=$(extract_number "maxDepth") -DNS_QPS=$(extract_number "dnsQueryRate") -CUSTOM_FLAGS=$(extract_string "customFlags") - -if [ -n "$TIMEOUT" ]; then - TIMEOUT_JSON="$TIMEOUT" -else - TIMEOUT_JSON=null -fi - -if [ -n "$MIN_FOR_RECUR" ]; then - MIN_FOR_RECUR_JSON="$MIN_FOR_RECUR" -else - MIN_FOR_RECUR_JSON=null -fi - -if [ -n "$MAX_DEPTH" ]; then - MAX_DEPTH_JSON="$MAX_DEPTH" -else - MAX_DEPTH_JSON=null -fi - -if [ -n "$DNS_QPS" ]; then - DNS_QPS_JSON="$DNS_QPS" -else - DNS_QPS_JSON=null -fi - -if [ -n "$CUSTOM_FLAGS" ]; then - CUSTOM_FLAGS_JSON=$(printf '"%s"' "$(printf '%s' "$CUSTOM_FLAGS" | sed 's/\\/\\\\/g; s/"/\\"/g')") -else - CUSTOM_FLAGS_JSON=null -fi - -AMASS_FLAGS="" -if [ "$ACTIVE" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -active" -fi -if [ "$BRUTE" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -brute" -fi -if [ "$INCLUDE_IPS" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -ip" -fi -if [ "$ALTERATIONS" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -alts" -fi -if [ "$RECURSIVE" = "false" ]; then - AMASS_FLAGS="$AMASS_FLAGS -norecursive" -else - if [ -n "$MIN_FOR_RECUR" ]; then - AMASS_FLAGS="$AMASS_FLAGS -min-for-recursive $MIN_FOR_RECUR" - fi -fi -if [ -n "$TIMEOUT" ]; then - AMASS_FLAGS="$AMASS_FLAGS -timeout $TIMEOUT" -fi -if [ -n "$MAX_DEPTH" ]; then - AMASS_FLAGS="$AMASS_FLAGS -max-depth $MAX_DEPTH" -fi -if [ -n "$DNS_QPS" ]; then - AMASS_FLAGS="$AMASS_FLAGS -dns-qps $DNS_QPS" -fi -if [ "$VERBOSE" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -v" -fi -if [ "$DEMO" = "true" ]; then - AMASS_FLAGS="$AMASS_FLAGS -demo" -fi - -DOMAIN_ARGS="" -DOMAIN_COUNT=0 -for DOMAIN in $DOMAIN_LIST; do - if [ -n "$DOMAIN" ]; then - DOMAIN_ARGS="$DOMAIN_ARGS -d $DOMAIN" - DOMAIN_COUNT=$((DOMAIN_COUNT + 1)) - fi -done - -RAW_FILE=$(mktemp) -DEDUP_FILE=$(mktemp) -trap 'rm -f "$RAW_FILE" "$DEDUP_FILE"' EXIT - -if [ "$DOMAIN_COUNT" -eq 0 ]; then - printf '{"subdomains":[],"rawOutput":"","domainCount":0,"subdomainCount":0,"options":{"active":%s,"bruteForce":%s,"includeIps":%s,"enableAlterations":%s,"recursive":%s,"verbose":%s,"demoMode":%s,"timeoutMinutes":%s,"minForRecursive":%s,"maxDepth":%s,"dnsQueryRate":%s,"customFlags":%s}}' \ - "$ACTIVE" \ - "$BRUTE" \ - "$INCLUDE_IPS" \ - "$ALTERATIONS" \ - "$RECURSIVE" \ - "$VERBOSE" \ - "$DEMO" \ - "$TIMEOUT_JSON" \ - "$MIN_FOR_RECUR_JSON" \ - "$MAX_DEPTH_JSON" \ - "$DNS_QPS_JSON" \ - "$CUSTOM_FLAGS_JSON" - exit 0 -fi - -AMASS_COMMAND="/bin/amass enum $AMASS_FLAGS $DOMAIN_ARGS" -if [ -n "$CUSTOM_FLAGS" ]; then - AMASS_COMMAND="$AMASS_COMMAND $CUSTOM_FLAGS" -fi - -set +e -eval "$AMASS_COMMAND" >"$RAW_FILE" -STATUS=$? -set -e - -if [ $STATUS -ne 0 ] && [ ! -s "$RAW_FILE" ]; then - exit $STATUS -fi - -sed -e 's/\r//g' "$RAW_FILE" | grep -v '^\[' | awk '{print $1}' | sed '/^$/d' | sort -u > "$DEDUP_FILE" - -SUBDOMAIN_COUNT=$(wc -l < "$DEDUP_FILE" | tr -d ' ') - -if [ "$SUBDOMAIN_COUNT" -eq 0 ]; then - SUBDOMAIN_JSON="[]" -else - SUBDOMAIN_JSON=$(awk 'NR==1{printf("[\"%s\"", $0); next} {printf(",\"%s\"", $0)} END {if (NR==0) printf("[]"); else printf("]");}' "$DEDUP_FILE") -fi - -RAW_OUTPUT_ESCAPED=$(printf '%s' "$(cat "$RAW_FILE")" | sed ':a;N;$!ba;s/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g') - -printf '{"subdomains":%s,"rawOutput":"%s","domainCount":%d,"subdomainCount":%d,"options":{"active":%s,"bruteForce":%s,"includeIps":%s,"enableAlterations":%s,"recursive":%s,"verbose":%s,"demoMode":%s,"timeoutMinutes":%s,"minForRecursive":%s,"maxDepth":%s,"dnsQueryRate":%s,"customFlags":%s}}' \ - "$SUBDOMAIN_JSON" \ - "$RAW_OUTPUT_ESCAPED" \ - "$DOMAIN_COUNT" \ - "$SUBDOMAIN_COUNT" \ - "$ACTIVE" \ - "$BRUTE" \ - "$INCLUDE_IPS" \ - "$ALTERATIONS" \ - "$RECURSIVE" \ - "$VERBOSE" \ - "$DEMO" \ - "$TIMEOUT_JSON" \ - "$MIN_FOR_RECUR_JSON" \ - "$MAX_DEPTH_JSON" \ - "$DNS_QPS_JSON" \ - "$CUSTOM_FLAGS_JSON" -`, - ], + timeoutSeconds: AMASS_TIMEOUT_SECONDS, env: { - HOME: '/root', + HOME: '/tmp', }, + // Shell wrapper pattern: sh -c 'amass "$@"' -- [args...] + // This allows dynamic args to be appended and properly passed to amass + command: ['-c', 'amass "$@"', '--'], }, inputs: inputSchema, outputs: outputSchema, @@ -482,33 +451,62 @@ printf '{"subdomains":%s,"rawOutput":"%s","domainCount":%d,"subdomainCount":%d," }, async execute({ inputs, params }, context) { const parsedParams = parameterSchema.parse(params); - const runnerPayload = { - ...inputs, - ...parsedParams, - }; + const { + passive, + active, + bruteForce, + enableAlterations, + recursive, + minForRecursive, + maxDepth, + dnsQueryRate, + includeIps, + verbose, + demoMode, + timeoutMinutes, + customFlags, + } = parsedParams; - const customFlags = - runnerPayload.customFlags && runnerPayload.customFlags.length > 0 - ? runnerPayload.customFlags - : null; + const trimmedCustomFlags = + typeof customFlags === 'string' && customFlags.length > 0 ? customFlags : null; + const customFlagArgs = trimmedCustomFlags ? splitCliArgs(trimmedCustomFlags) : []; const optionsSummary = { - active: parsedParams.active ?? false, - bruteForce: parsedParams.bruteForce ?? false, - enableAlterations: parsedParams.enableAlterations ?? false, - includeIps: parsedParams.includeIps ?? false, - recursive: parsedParams.recursive ?? true, - minForRecursive: parsedParams.minForRecursive ?? null, - maxDepth: parsedParams.maxDepth ?? null, - dnsQueryRate: parsedParams.dnsQueryRate ?? null, - verbose: parsedParams.verbose ?? false, - demoMode: parsedParams.demoMode ?? false, - timeoutMinutes: parsedParams.timeoutMinutes ?? null, - customFlags, + passive: passive ?? true, + active: active ?? false, + bruteForce: bruteForce ?? false, + enableAlterations: enableAlterations ?? false, + includeIps: includeIps ?? false, + recursive: recursive ?? true, + minForRecursive: minForRecursive ?? null, + maxDepth: maxDepth ?? null, + dnsQueryRate: dnsQueryRate ?? null, + verbose: verbose ?? false, + demoMode: demoMode ?? false, + timeoutMinutes: timeoutMinutes ?? null, + customFlags: trimmedCustomFlags, }; + // Normalize domains + const normalisedDomains = inputs.domains + .map((domain) => domain.trim()) + .filter((domain) => domain.length > 0); + + const domainCount = normalisedDomains.length; + + if (domainCount === 0) { + context.logger.info('[Amass] Skipping execution because no domains were provided.'); + return { + subdomains: [], + rawOutput: '', + domainCount: 0, + subdomainCount: 0, + options: optionsSummary, + }; + } + context.logger.info( - `[Amass] Enumerating ${inputs.domains.length} domain(s) with options: ${JSON.stringify(optionsSummary)}`, + `[Amass] Enumerating ${domainCount} domain(s) with options: ${JSON.stringify(optionsSummary)}`, ); context.emitProgress({ @@ -517,66 +515,134 @@ printf '{"subdomains":%s,"rawOutput":"%s","domainCount":%d,"subdomainCount":%d," data: { domains: inputs.domains, options: optionsSummary }, }); - const normalizedInput: (typeof inputSchema)['__inferred'] & - (typeof parameterSchema)['__inferred'] = { - ...runnerPayload, - customFlags: customFlags ?? undefined, - }; + // Extract tenant ID from context + const tenantId = (context as any).tenantId ?? 'default-tenant'; - const result = await runComponentWithRunner( - definition.runner, - async () => ({}) as Output, - normalizedInput, - context, - ); + // Create isolated volume for this execution + const volume = new IsolatedContainerVolume(tenantId, context.runId); - if (typeof result === 'string') { - try { - const parsed = JSON.parse(result); - return outputSchema.parse(parsed); - } catch (error) { - context.logger.error(`[Amass] Failed to parse raw output: ${(error as Error).message}`); - throw new ServiceError('Amass returned unexpected raw output format', { - cause: error as Error, - details: { outputType: typeof result }, - }); - } + const baseRunner = definition.runner; + if (baseRunner.kind !== 'docker') { + throw new ContainerError('Amass runner is expected to be docker-based.', { + details: { expectedKind: 'docker', actualKind: baseRunner.kind }, + }); } - const parsed = outputSchema.safeParse(result); - if (!parsed.success) { - context.logger.error('[Amass] Output validation failed', parsed.error); - throw new ValidationError('Amass output validation failed', { - cause: parsed.error, - details: { issues: parsed.error.issues }, + let rawOutput: string; + try { + // Initialize volume with domain file + const volumeName = await volume.initialize({ + [DOMAIN_FILE_NAME]: normalisedDomains.join('\n'), + }); + context.logger.info(`[Amass] Created isolated volume: ${volumeName}`); + + // Build Amass arguments in TypeScript + const amassArgs = buildAmassArgs({ + domainFile: `${CONTAINER_INPUT_DIR}/${DOMAIN_FILE_NAME}`, + passive: passive ?? true, + active: active ?? false, + bruteForce: bruteForce ?? false, + enableAlterations: enableAlterations ?? false, + recursive: recursive ?? true, + minForRecursive, + maxDepth, + dnsQueryRate, + includeIps: includeIps ?? false, + verbose: verbose ?? false, + demoMode: demoMode ?? false, + timeoutMinutes, + customFlags: customFlagArgs, }); + + const runnerConfig: DockerRunnerConfig = { + kind: 'docker', + image: baseRunner.image, + network: baseRunner.network, + timeoutSeconds: baseRunner.timeoutSeconds ?? AMASS_TIMEOUT_SECONDS, + env: { ...(baseRunner.env ?? {}) }, + // Preserve the shell wrapper from baseRunner (sh -c 'amass "$@"' --) + entrypoint: baseRunner.entrypoint, + // Append amass arguments to shell wrapper command + command: [...(baseRunner.command ?? []), ...amassArgs], + volumes: [volume.getVolumeConfig(CONTAINER_INPUT_DIR, true)], + }; + + const result = await runComponentWithRunner( + runnerConfig, + async () => ({}) as Output, + { domains: inputs.domains }, + context, + ); + + // Get raw output (either string or from object) + if (typeof result === 'string') { + rawOutput = result; + } else if (result && typeof result === 'object' && 'rawOutput' in result) { + rawOutput = String((result as any).rawOutput ?? ''); + } else { + rawOutput = ''; + } + } finally { + // Always cleanup the volume + await volume.cleanup(); + context.logger.info('[Amass] Cleaned up isolated volume'); } + // Parse output in TypeScript (not shell) + const lines = rawOutput + .trim() + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0); + + // Deduplicate subdomains - extract hostname from each line + // Amass output can include IP addresses or other data after the hostname + const subdomainSet = new Set( + lines + .map((line) => { + // Extract first token (hostname) + const tokens = line.split(/\s+/); + return tokens[0] || ''; + }) + .filter((host) => host.length > 0 && !host.startsWith('[')), + ); + const subdomains = Array.from(subdomainSet); + const subdomainCount = subdomains.length; + context.logger.info( - `[Amass] Found ${parsed.data.subdomainCount} unique subdomains across ${parsed.data.domainCount} domains`, + `[Amass] Found ${subdomainCount} unique subdomains across ${domainCount} domains`, ); - if (parsed.data.subdomainCount === 0) { + if (subdomainCount === 0) { context.emitProgress({ message: 'No subdomains discovered by Amass', level: 'warn', }); } else { context.emitProgress({ - message: `Amass discovered ${parsed.data.subdomainCount} subdomains`, + message: `Amass discovered ${subdomainCount} subdomains`, level: 'info', - data: { subdomains: parsed.data.subdomains.slice(0, 10) }, + data: { subdomains: subdomains.slice(0, 10) }, }); } - return parsed.data; + return { + subdomains, + rawOutput, + domainCount, + subdomainCount, + options: optionsSummary, + }; }, }); componentRegistry.register(definition); +// Internal type for execute function +type Output = (typeof outputSchema)['__inferred']; + // Create local type aliases for backward compatibility -type Input = typeof inputSchema; -type Output = typeof outputSchema; +type AmassInput = typeof inputSchema; +type AmassOutput = typeof outputSchema; -export type { Input as AmassInput, Output as AmassOutput }; +export type { AmassInput, AmassOutput }; diff --git a/worker/src/components/security/subfinder.ts b/worker/src/components/security/subfinder.ts index f0e18789..3d7c024d 100644 --- a/worker/src/components/security/subfinder.ts +++ b/worker/src/components/security/subfinder.ts @@ -14,6 +14,13 @@ import { } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; +const SUBFINDER_IMAGE = 'projectdiscovery/subfinder:v2.12.0'; +const SUBFINDER_TIMEOUT_SECONDS = 1800; // 30 minutes +const INPUT_MOUNT_NAME = 'inputs'; +const CONTAINER_INPUT_DIR = `/${INPUT_MOUNT_NAME}`; +const DOMAIN_FILE_NAME = 'domains.txt'; +const PROVIDER_CONFIG_FILE_NAME = 'provider-config.yaml'; + const domainValueSchema = z.preprocess( (val) => (typeof val === 'string' ? [val] : val), z.array(z.string().min(1)), @@ -47,6 +54,55 @@ const parameterSchema = parameters({ description: 'Legacy single-domain input (prefer Target Domains).', visibleWhen: { __legacy: true }, }), + threads: param(z.number().int().min(1).max(100).default(10), { + label: 'Threads', + editor: 'number', + min: 1, + max: 100, + description: 'Number of concurrent threads for subdomain enumeration.', + }), + timeout: param(z.number().int().min(1).max(300).default(30), { + label: 'Timeout (seconds)', + editor: 'number', + min: 1, + max: 300, + description: 'Timeout per source in seconds.', + }), + maxEnumerationTime: param(z.number().int().min(1).max(60).optional(), { + label: 'Max Enumeration Time (minutes)', + editor: 'number', + min: 1, + max: 60, + description: 'Maximum enumeration time in minutes (optional).', + }), + rateLimit: param(z.number().int().min(1).max(1000).optional(), { + label: 'Rate Limit', + editor: 'number', + min: 1, + max: 1000, + description: 'Maximum rate limit per source (requests per minute).', + }), + allSources: param(z.boolean().default(false), { + label: 'Use All Sources', + editor: 'boolean', + description: 'Use all available sources (slow but comprehensive).', + }), + recursive: param(z.boolean().default(false), { + label: 'Recursive Enumeration', + editor: 'boolean', + description: 'Enable recursive subdomain enumeration.', + }), + customFlags: param( + z.string().trim().optional().describe('Raw CLI flags to append to the subfinder command'), + { + label: 'Custom CLI Flags', + editor: 'textarea', + rows: 3, + placeholder: '-sources shodan,censys', + description: 'Paste additional subfinder CLI options exactly as you would on the command line.', + helpText: 'Flags are appended after the generated options.', + }, + ), }); const outputSchema = outputs({ @@ -68,7 +124,126 @@ const outputSchema = outputs({ }), }); -const SUBFINDER_TIMEOUT_SECONDS = 1800; // 30 minutes +// Split custom CLI flags into an array of arguments +const splitCliArgs = (input: string): string[] => { + const args: string[] = []; + let current = ''; + let quote: '"' | "'" | null = null; + let escape = false; + + for (const ch of input) { + if (escape) { + current += ch; + escape = false; + continue; + } + + if (ch === '\\') { + escape = true; + continue; + } + + if (quote) { + if (ch === quote) { + quote = null; + } else { + current += ch; + } + continue; + } + + if (ch === '"' || ch === "'") { + quote = ch as '"' | "'"; + continue; + } + + if (/\s/.test(ch)) { + if (current.length > 0) { + args.push(current); + current = ''; + } + continue; + } + + current += ch; + } + + if (current.length > 0) { + args.push(current); + } + + return args; +}; + +interface BuildSubfinderArgsOptions { + domainFile: string; + providerConfigFile?: string; + threads?: number; + timeout?: number; + maxEnumerationTime?: number; + rateLimit?: number; + allSources: boolean; + recursive: boolean; + customFlags: string[]; +} + +/** + * Build Subfinder CLI arguments in TypeScript. + * This follows the Dynamic Args Pattern recommended in component-development.md + */ +const buildSubfinderArgs = (options: BuildSubfinderArgsOptions): string[] => { + const args: string[] = []; + + // Always use silent mode for clean output + args.push('-silent'); + + // Domain list file input + args.push('-dL', options.domainFile); + + // Provider config file (if provided) + if (options.providerConfigFile) { + args.push('-pc', options.providerConfigFile); + } + + // Thread count + if (typeof options.threads === 'number' && options.threads >= 1) { + args.push('-t', String(options.threads)); + } + + // Timeout per source + if (typeof options.timeout === 'number' && options.timeout >= 1) { + args.push('-timeout', String(options.timeout)); + } + + // Max enumeration time + if (typeof options.maxEnumerationTime === 'number' && options.maxEnumerationTime >= 1) { + args.push('-max-time', String(options.maxEnumerationTime)); + } + + // Rate limit + if (typeof options.rateLimit === 'number' && options.rateLimit >= 1) { + args.push('-rl', String(options.rateLimit)); + } + + // All sources + if (options.allSources) { + args.push('-all'); + } + + // Recursive enumeration + if (options.recursive) { + args.push('-recursive'); + } + + // Custom flags (appended last) + for (const flag of options.customFlags) { + if (flag.length > 0) { + args.push(flag); + } + } + + return args; +}; // Retry policy for Subfinder - long-running discovery operations const subfinderRetryPolicy: ComponentRetryPolicy = { @@ -86,31 +261,20 @@ const definition = defineComponent({ retryPolicy: subfinderRetryPolicy, runner: { kind: 'docker', - image: 'projectdiscovery/subfinder:v2.10.1', + image: SUBFINDER_IMAGE, + // IMPORTANT: Use shell wrapper for PTY compatibility + // Running CLI tools directly as entrypoint can cause them to hang with PTY (pseudo-terminal) + // The shell wrapper ensures proper TTY signal handling and clean exit + // See docs/component-development.md "Docker Entrypoint Pattern" for details entrypoint: 'sh', network: 'bridge', - command: [ - '-c', - String.raw`set -eo pipefail - -if [ -n "$SUBFINDER_PROVIDER_CONFIG_B64" ]; then - CONFIG_DIR="$HOME/.config/subfinder" - mkdir -p "$CONFIG_DIR" - printf '%s' "$SUBFINDER_PROVIDER_CONFIG_B64" | base64 -d > "$CONFIG_DIR/provider-config.yaml" -fi - -# NOTE: We intentionally DO NOT use the -json flag for subfinder -# Reason: Subfinder's -json outputs JSONL (one JSON per line), not a JSON array -# JSONL requires line-by-line parsing: output.split('\n').map(line => JSON.parse(line)) -# Plain text is simpler: output.split('\n').filter(line => line.length > 0) -# See docs/component-development.md "Output Format Selection" for details -subfinder -silent -dL /inputs/domains.txt 2>/dev/null || true -`, - ], timeoutSeconds: SUBFINDER_TIMEOUT_SECONDS, env: { HOME: '/root', }, + // Shell wrapper pattern: sh -c 'subfinder "$@"' -- [args...] + // This allows dynamic args to be appended and properly passed to subfinder + command: ['-c', 'subfinder "$@"', '--'], }, inputs: inputSchema, outputs: outputSchema, @@ -140,13 +304,23 @@ subfinder -silent -dL /inputs/domains.txt 2>/dev/null || true ], }, async execute({ inputs, params }, context) { - const baseRunner = definition.runner; - if (baseRunner.kind !== 'docker') { - throw new ContainerError('Subfinder runner is expected to be docker-based.', { - details: { expectedKind: 'docker', actualKind: baseRunner.kind }, - }); - } + const parsedParams = parameterSchema.parse(params); + const { + domain: legacyDomain, + threads, + timeout, + maxEnumerationTime, + rateLimit, + allSources, + recursive, + customFlags, + } = parsedParams; + const trimmedCustomFlags = + typeof customFlags === 'string' && customFlags.length > 0 ? customFlags : null; + const customFlagArgs = trimmedCustomFlags ? splitCliArgs(trimmedCustomFlags) : []; + + // Collect domains from both inputs and legacy parameter const values = new Set(); const addValue = (value: string | string[] | undefined) => { if (Array.isArray(value)) { @@ -167,15 +341,18 @@ subfinder -silent -dL /inputs/domains.txt 2>/dev/null || true }; addValue(inputs.domains); - addValue(params.domain); + addValue(legacyDomain); const domains = Array.from(values); + const domainCount = domains.length; + const providerConfig = typeof inputs.providerConfig === 'string' && inputs.providerConfig.trim().length > 0 ? inputs.providerConfig : undefined; - if (domains.length === 0) { + if (domainCount === 0) { + context.logger.info('[Subfinder] Skipping execution because no domains were provided.'); return { subdomains: [], rawOutput: '', @@ -184,103 +361,131 @@ subfinder -silent -dL /inputs/domains.txt 2>/dev/null || true }; } + context.logger.info(`[Subfinder] Enumerating ${domainCount} domain(s)`); + context.emitProgress({ + message: `Launching Subfinder for ${domainCount} domain${domainCount === 1 ? '' : 's'}`, + level: 'info', + data: { domains }, + }); + + // Extract tenant ID from context const tenantId = (context as any).tenantId ?? 'default-tenant'; + + // Create isolated volume for this execution const volume = new IsolatedContainerVolume(tenantId, context.runId); - try { - await volume.initialize({ - 'domains.txt': domains.join('\n'), + const baseRunner = definition.runner; + if (baseRunner.kind !== 'docker') { + throw new ContainerError('Subfinder runner is expected to be docker-based.', { + details: { expectedKind: 'docker', actualKind: baseRunner.kind }, }); - context.logger.info(`[Subfinder] Created isolated volume for ${domains.length} domain(s).`); + } - const runnerConfig: DockerRunnerConfig = { - ...baseRunner, - env: { ...(baseRunner.env ?? {}) }, - volumes: [volume.getVolumeConfig('/inputs', true)], + let rawOutput: string; + try { + // Prepare input files for the volume + const inputFiles: Record = { + [DOMAIN_FILE_NAME]: domains.join('\n'), }; + // Add provider config file if provided if (providerConfig) { - const encoded = Buffer.from(providerConfig, 'utf8').toString('base64'); + inputFiles[PROVIDER_CONFIG_FILE_NAME] = providerConfig; + context.logger.info('[Subfinder] Provider configuration will be mounted.'); + } - runnerConfig.env = { - ...(runnerConfig.env ?? {}), - SUBFINDER_PROVIDER_CONFIG_B64: encoded, - }; + // Initialize the volume with input files + const volumeName = await volume.initialize(inputFiles); + context.logger.info(`[Subfinder] Created isolated volume: ${volumeName}`); - context.logger.info( - '[Subfinder] Provider configuration secret injected into runner environment.', - ); - } + // Build Subfinder arguments in TypeScript + const subfinderArgs = buildSubfinderArgs({ + domainFile: `${CONTAINER_INPUT_DIR}/${DOMAIN_FILE_NAME}`, + providerConfigFile: providerConfig + ? `${CONTAINER_INPUT_DIR}/${PROVIDER_CONFIG_FILE_NAME}` + : undefined, + threads: threads ?? 10, + timeout: timeout ?? 30, + maxEnumerationTime, + rateLimit, + allSources: allSources ?? false, + recursive: recursive ?? false, + customFlags: customFlagArgs, + }); + + const runnerConfig: DockerRunnerConfig = { + kind: 'docker', + image: baseRunner.image, + network: baseRunner.network, + timeoutSeconds: baseRunner.timeoutSeconds ?? SUBFINDER_TIMEOUT_SECONDS, + env: { ...(baseRunner.env ?? {}) }, + // Preserve the shell wrapper from baseRunner (sh -c 'subfinder "$@"' --) + entrypoint: baseRunner.entrypoint, + // Append subfinder arguments to shell wrapper command + command: [...(baseRunner.command ?? []), ...subfinderArgs], + volumes: [volume.getVolumeConfig(CONTAINER_INPUT_DIR, true)], + }; const result = await runComponentWithRunner( runnerConfig, - async () => ({}), + async () => ({}) as Output, { domains, providerConfig }, context, ); + // Get raw output (either string or from object) if (typeof result === 'string') { - const rawOutput = result; - const dedupedSubdomains = Array.from( - new Set( - rawOutput - .split('\n') - .map((line) => line.trim()) - .filter((line) => line.length > 0), - ), - ); - - return { - subdomains: dedupedSubdomains, - rawOutput, - domainCount: domains.length, - subdomainCount: dedupedSubdomains.length, - }; + rawOutput = result; + } else if (result && typeof result === 'object' && 'rawOutput' in result) { + rawOutput = String((result as any).rawOutput ?? ''); + } else { + rawOutput = ''; } + } finally { + // Always cleanup the volume + await volume.cleanup(); + context.logger.info('[Subfinder] Cleaned up isolated volume'); + } - if (result && typeof result === 'object') { - const parsed = outputSchema.safeParse(result); - if (parsed.success) { - return parsed.data; - } + // Parse output in TypeScript (not shell) + // NOTE: We intentionally DO NOT use the -json flag for subfinder + // Reason: Subfinder's -json outputs JSONL (one JSON per line), not a JSON array + // JSONL requires line-by-line parsing: output.split('\n').map(line => JSON.parse(line)) + // Plain text is simpler: output.split('\n').filter(line => line.length > 0) + const lines = rawOutput + .trim() + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0); - // Fallback: attempt to normalise unexpected object shapes - const maybeRaw = 'rawOutput' in result ? String((result as any).rawOutput ?? '') : ''; - const subdomainsValue = Array.isArray((result as any).subdomains) - ? ((result as any).subdomains as unknown[]) - .map((value) => (typeof value === 'string' ? value.trim() : String(value))) - .filter((value) => value.length > 0) - : maybeRaw - .split('\n') - .map((line) => line.trim()) - .filter((line) => line.length > 0); - - const output: Output = { - subdomains: subdomainsValue, - rawOutput: maybeRaw || subdomainsValue.join('\n'), - domainCount: - typeof (result as any).domainCount === 'number' - ? (result as any).domainCount - : domains.length, - subdomainCount: - typeof (result as any).subdomainCount === 'number' - ? (result as any).subdomainCount - : subdomainsValue.length, - }; - - return output; - } + // Deduplicate subdomains + const subdomainSet = new Set(lines); + const subdomains = Array.from(subdomainSet); + const subdomainCount = subdomains.length; - return { - subdomains: [], - rawOutput: '', - domainCount: domains.length, - subdomainCount: 0, - }; - } finally { - await volume.cleanup(); - context.logger.info('[Subfinder] Cleaned up isolated volume.'); + context.logger.info( + `[Subfinder] Found ${subdomainCount} unique subdomains across ${domainCount} domains`, + ); + + if (subdomainCount === 0) { + context.emitProgress({ + message: 'No subdomains discovered by Subfinder', + level: 'warn', + }); + } else { + context.emitProgress({ + message: `Subfinder discovered ${subdomainCount} subdomains`, + level: 'info', + data: { subdomains: subdomains.slice(0, 10) }, + }); } + + return { + subdomains, + rawOutput, + domainCount, + subdomainCount, + }; }, }); From 0da2eedc6fe801b8085a812c91101a30562d1139 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 09:11:14 -0500 Subject: [PATCH 02/14] perf(worker): optimize amass component for faster enumeration - Add default 15-minute timeout to prevent runaway scans - Add configurable DNS resolvers (Cloudflare, Google, Quad9 defaults) - Add configurable data sources, default to lightweight sources only - Exclude wayback/commoncrawl by default (can download 1GB+ per domain) - Disable recursive brute force by default for faster scans - Fix -src flag to -include (correct amass v5 syntax) These optimizations prevent system overload from excessive network I/O while maintaining useful subdomain enumeration capabilities. Signed-off-by: Aseem Shrey --- .../security/__tests__/amass.test.ts | 74 +++++----------- .../security/__tests__/subfinder.test.ts | 18 ++-- worker/src/components/security/amass.ts | 88 ++++++++++++++++--- worker/src/components/security/subfinder.ts | 3 +- 4 files changed, 113 insertions(+), 70 deletions(-) diff --git a/worker/src/components/security/__tests__/amass.test.ts b/worker/src/components/security/__tests__/amass.test.ts index 8e4cbd78..5f1973c6 100644 --- a/worker/src/components/security/__tests__/amass.test.ts +++ b/worker/src/components/security/__tests__/amass.test.ts @@ -27,21 +27,24 @@ describe('amass component', () => { const parsedParams = component.parameters!.parse(paramValues); + expect(parsedParams.passive).toBe(true); expect(parsedParams.active).toBe(false); expect(parsedParams.bruteForce).toBe(false); expect(parsedParams.includeIps).toBe(false); expect(parsedParams.enableAlterations).toBe(false); - expect(parsedParams.recursive).toBe(true); + expect(parsedParams.recursive).toBe(false); expect(parsedParams.verbose).toBe(false); expect(parsedParams.demoMode).toBe(false); - expect(parsedParams.timeoutMinutes).toBeUndefined(); + expect(parsedParams.timeoutMinutes).toBe(15); + expect(parsedParams.resolvers).toBe('1.1.1.1,8.8.8.8,9.9.9.9,8.8.4.4,1.0.0.1'); + expect(parsedParams.dataSources).toBe('crtsh,hackertarget'); expect(parsedParams.minForRecursive).toBeUndefined(); expect(parsedParams.maxDepth).toBeUndefined(); expect(parsedParams.dnsQueryRate).toBeUndefined(); expect(parsedParams.customFlags).toBeUndefined(); }); - it('should parse raw JSON response returned as string', async () => { + it('should parse raw text output from docker container', async () => { const component = componentRegistry.get('shipsec.amass.enum'); if (!component) throw new Error('Component not registered'); @@ -59,35 +62,18 @@ describe('amass component', () => { }, }; - const payload = JSON.stringify({ - subdomains: ['api.example.com'], - rawOutput: 'api.example.com', - domainCount: 1, - subdomainCount: 1, - options: { - active: true, - bruteForce: false, - includeIps: false, - enableAlterations: false, - recursive: true, - verbose: false, - demoMode: false, - timeoutMinutes: null, - minForRecursive: null, - maxDepth: null, - dnsQueryRate: null, - customFlags: null, - }, - }); - - vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(payload); + // Mock docker returning raw subdomain output (one per line) + vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue('api.example.com\nwww.example.com'); const result = await component.execute(executePayload, context); - expect(result).toEqual(component.outputs.parse(JSON.parse(payload))); + expect(result.subdomains).toContain('api.example.com'); + expect(result.subdomains).toContain('www.example.com'); + expect(result.subdomainCount).toBe(2); + expect(result.domainCount).toBe(1); }); - it('should propagate structured output when docker returns JSON', async () => { + it('should handle structured object output from docker', async () => { const component = componentRegistry.get('shipsec.amass.enum'); if (!component) throw new Error('Component not registered'); @@ -107,31 +93,17 @@ describe('amass component', () => { }, }; - const payload = component.outputs.parse({ - subdomains: ['login.example.com', 'dev.example.org'], - rawOutput: 'login.example.com\nlogin.example.com 93.184.216.34\ndev.example.org', - domainCount: 2, - subdomainCount: 2, - options: { - active: false, - bruteForce: true, - includeIps: true, - enableAlterations: false, - recursive: true, - verbose: false, - demoMode: false, - timeoutMinutes: 2, - minForRecursive: null, - maxDepth: null, - dnsQueryRate: null, - customFlags: null, - }, - }); - - vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(payload); + // Mock docker returning raw output with IP addresses + const rawOutput = 'login.example.com 93.184.216.34\ndev.example.org'; + vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(rawOutput); const result = await component.execute(executePayload, context); - expect(result).toEqual(payload); + + expect(result.subdomains).toContain('login.example.com'); + expect(result.subdomains).toContain('dev.example.org'); + expect(result.subdomainCount).toBe(2); + expect(result.domainCount).toBe(2); + expect(result.rawOutput).toBe(rawOutput); }); it('should configure docker runner for owaspamass/amass image', () => { @@ -140,7 +112,7 @@ describe('amass component', () => { expect(component.runner.kind).toBe('docker'); if (component.runner.kind === 'docker') { - expect(component.runner.image).toBe('owaspamass/amass:v4.2.0'); + expect(component.runner.image).toBe('owaspamass/amass:v5.0.1'); expect(component.runner.entrypoint).toBe('sh'); expect(component.runner.command).toBeInstanceOf(Array); } diff --git a/worker/src/components/security/__tests__/subfinder.test.ts b/worker/src/components/security/__tests__/subfinder.test.ts index 46901281..9146e179 100644 --- a/worker/src/components/security/__tests__/subfinder.test.ts +++ b/worker/src/components/security/__tests__/subfinder.test.ts @@ -101,7 +101,7 @@ describe('subfinder component', () => { expect(params.domain).toBe('legacy.example.com'); }); - it('should inject provider config content into docker environment when configured', async () => { + it('should pass provider config via -pc flag when configured', async () => { const component = componentRegistry.get( 'shipsec.subfinder.run', ); @@ -137,9 +137,17 @@ describe('subfinder component', () => { const [runnerConfig] = runnerSpy.mock.calls[0]; expect(runnerConfig).toBeDefined(); if (runnerConfig && runnerConfig.kind === 'docker') { - expect(runnerConfig.env?.SUBFINDER_PROVIDER_CONFIG_B64).toBe( - Buffer.from(secretValue, 'utf8').toString('base64'), - ); + // After Dynamic Args Pattern refactoring, provider config is mounted as a file + // and passed via -pc flag in the command arguments + const command = runnerConfig.command ?? []; + expect(command).toContain('-pc'); + // The -pc flag should be followed by the path to the provider config file + const pcIndex = command.indexOf('-pc'); + expect(pcIndex).toBeGreaterThan(-1); + expect(command[pcIndex + 1]).toContain('provider-config.yaml'); + // Volume should be configured + expect(runnerConfig.volumes).toBeDefined(); + expect(runnerConfig.volumes?.length).toBeGreaterThan(0); } }); @@ -151,7 +159,7 @@ describe('subfinder component', () => { expect(component.runner.kind).toBe('docker'); if (component.runner.kind === 'docker') { - expect(component.runner.image).toBe('projectdiscovery/subfinder:v2.10.1'); + expect(component.runner.image).toBe('projectdiscovery/subfinder:v2.12.0'); } }); }); diff --git a/worker/src/components/security/amass.ts b/worker/src/components/security/amass.ts index 8b7a1861..0ef9608d 100644 --- a/worker/src/components/security/amass.ts +++ b/worker/src/components/security/amass.ts @@ -19,10 +19,20 @@ const AMASS_TIMEOUT_SECONDS = (() => { const raw = process.env.AMASS_TIMEOUT_SECONDS; const parsed = raw ? Number.parseInt(raw, 10) : NaN; if (!Number.isFinite(parsed) || Number.isNaN(parsed)) { - return 600; + return 900; // 15 minutes default } return parsed; })(); + +// Free data sources that don't require API keys (fast, lightweight) +// NOTE: wayback and commoncrawl are excluded - they return massive amounts of data +// and can choke the system with 1GB+ downloads even for a single domain +const DEFAULT_FREE_DATA_SOURCES = ['crtsh', 'hackertarget']; +const DEFAULT_DATA_SOURCES_STRING = DEFAULT_FREE_DATA_SOURCES.join(','); + +// Fast public DNS resolvers (Cloudflare, Google, Quad9) +const DEFAULT_RESOLVERS = ['1.1.1.1', '8.8.8.8', '9.9.9.9', '8.8.4.4', '1.0.0.1']; +const DEFAULT_RESOLVERS_STRING = DEFAULT_RESOLVERS.join(','); const INPUT_MOUNT_NAME = 'inputs'; const CONTAINER_INPUT_DIR = `/${INPUT_MOUNT_NAME}`; const DOMAIN_FILE_NAME = 'domains.txt'; @@ -43,10 +53,7 @@ const inputSchema = inputs({ const parameterSchema = parameters({ passive: param( - z - .boolean() - .default(true) - .describe('Use passive mode only (no DNS queries, faster)'), + z.boolean().default(true).describe('Use passive mode only (no DNS queries, faster)'), { label: 'Passive Mode', editor: 'boolean', @@ -87,13 +94,13 @@ const parameterSchema = parameters({ recursive: param( z .boolean() - .default(true) + .default(false) .describe('Allow recursive brute forcing when enough labels are discovered'), { label: 'Recursive Brute Force', editor: 'boolean', description: 'Allow recursive brute forcing when sufficient labels are discovered.', - helpText: 'Disable to keep enumeration shallow when DNS infrastructure is fragile.', + helpText: 'Enable for deeper enumeration. Keep disabled for faster, shallower scans.', }, ), minForRecursive: param( @@ -185,7 +192,7 @@ const parameterSchema = parameters({ .int() .positive() .max(360, 'Timeout larger than 6 hours is not supported') - .optional() + .default(15) .describe('Maximum enumeration runtime before Amass exits'), { label: 'Timeout (minutes)', @@ -194,7 +201,38 @@ const parameterSchema = parameters({ max: 360, description: 'Stop Amass after the specified number of minutes.', placeholder: '15', - helpText: 'Leave blank to allow Amass to run to completion.', + helpText: + 'Default is 15 minutes. Decrease for quick scans, increase for thorough enumeration.', + }, + ), + resolvers: param( + z + .string() + .trim() + .default(DEFAULT_RESOLVERS_STRING) + .describe('Comma-separated list of DNS resolvers to use'), + { + label: 'DNS Resolvers', + editor: 'text', + placeholder: '1.1.1.1,8.8.8.8,9.9.9.9', + description: 'Fast DNS resolvers for query resolution.', + helpText: + 'Default uses Cloudflare (1.1.1.1), Google (8.8.8.8), and Quad9 (9.9.9.9). Add custom resolvers if needed.', + }, + ), + dataSources: param( + z + .string() + .trim() + .default(DEFAULT_DATA_SOURCES_STRING) + .describe('Comma-separated list of data sources to query'), + { + label: 'Data Sources', + editor: 'text', + placeholder: 'crtsh,hackertarget', + description: 'Limit which data sources Amass queries (speeds up enumeration).', + helpText: + 'Default uses lightweight free sources. Add wayback,commoncrawl for more coverage (warning: very data-heavy).', }, ), }); @@ -230,6 +268,8 @@ const outputSchema = outputs({ minForRecursive: z.number().nullable(), maxDepth: z.number().nullable(), dnsQueryRate: z.number().nullable(), + resolvers: z.string().nullable(), + dataSources: z.string().nullable(), customFlags: z.string().nullable(), }), { @@ -305,6 +345,8 @@ interface BuildAmassArgsOptions { verbose: boolean; demoMode: boolean; timeoutMinutes?: number; + resolvers?: string; + dataSources?: string; customFlags: string[]; } @@ -370,6 +412,17 @@ const buildAmassArgs = (options: BuildAmassArgsOptions): string[] => { args.push('-timeout', String(options.timeoutMinutes)); } + // Data sources - limit which sources to query for faster enumeration + // Use -include flag (not -src) to specify which data sources to use + if (typeof options.dataSources === 'string' && options.dataSources.length > 0) { + args.push('-include', options.dataSources); + } + + // DNS resolvers - use fast public resolvers for better performance + if (typeof options.resolvers === 'string' && options.resolvers.length > 0) { + args.push('-r', options.resolvers); + } + // Verbose if (options.verbose) { args.push('-v'); @@ -464,6 +517,8 @@ const definition = defineComponent({ verbose, demoMode, timeoutMinutes, + resolvers, + dataSources, customFlags, } = parsedParams; @@ -471,19 +526,24 @@ const definition = defineComponent({ typeof customFlags === 'string' && customFlags.length > 0 ? customFlags : null; const customFlagArgs = trimmedCustomFlags ? splitCliArgs(trimmedCustomFlags) : []; + const effectiveDataSources = dataSources ?? DEFAULT_DATA_SOURCES_STRING; + const effectiveResolvers = resolvers ?? DEFAULT_RESOLVERS_STRING; + const optionsSummary = { passive: passive ?? true, active: active ?? false, bruteForce: bruteForce ?? false, enableAlterations: enableAlterations ?? false, includeIps: includeIps ?? false, - recursive: recursive ?? true, + recursive: recursive ?? false, minForRecursive: minForRecursive ?? null, maxDepth: maxDepth ?? null, dnsQueryRate: dnsQueryRate ?? null, verbose: verbose ?? false, demoMode: demoMode ?? false, - timeoutMinutes: timeoutMinutes ?? null, + timeoutMinutes: timeoutMinutes ?? 15, + resolvers: effectiveResolvers, + dataSources: effectiveDataSources, customFlags: trimmedCustomFlags, }; @@ -543,14 +603,16 @@ const definition = defineComponent({ active: active ?? false, bruteForce: bruteForce ?? false, enableAlterations: enableAlterations ?? false, - recursive: recursive ?? true, + recursive: recursive ?? false, minForRecursive, maxDepth, dnsQueryRate, includeIps: includeIps ?? false, verbose: verbose ?? false, demoMode: demoMode ?? false, - timeoutMinutes, + timeoutMinutes: timeoutMinutes ?? 15, + resolvers: effectiveResolvers, + dataSources: effectiveDataSources, customFlags: customFlagArgs, }); diff --git a/worker/src/components/security/subfinder.ts b/worker/src/components/security/subfinder.ts index 3d7c024d..2b4c9257 100644 --- a/worker/src/components/security/subfinder.ts +++ b/worker/src/components/security/subfinder.ts @@ -99,7 +99,8 @@ const parameterSchema = parameters({ editor: 'textarea', rows: 3, placeholder: '-sources shodan,censys', - description: 'Paste additional subfinder CLI options exactly as you would on the command line.', + description: + 'Paste additional subfinder CLI options exactly as you would on the command line.', helpText: 'Flags are appended after the generated options.', }, ), From e820708c95cfc804ffb083e5c0513a6e82e3d783 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 09:35:16 -0500 Subject: [PATCH 03/14] fix(worker): preserve output on non-zero container exit Security tools like amass and subfinder can exit non-zero when some data sources fail or rate-limit, but still produce valid partial results. Previously, this would throw ContainerError and lose all output. Changes: - Include stdout in ContainerError details (runner.ts) - Catch ContainerError in amass/subfinder and extract partial output - Log warning when preserving partial results This restores the prior behavior where partial results were returned instead of failing the entire workflow. Signed-off-by: Aseem Shrey --- packages/component-sdk/src/runner.ts | 3 +- worker/src/components/security/amass.ts | 54 +++++++++++++++------ worker/src/components/security/subfinder.ts | 54 +++++++++++++++------ 3 files changed, 82 insertions(+), 29 deletions(-) diff --git a/packages/component-sdk/src/runner.ts b/packages/component-sdk/src/runner.ts index 6a98c38e..a969aaad 100644 --- a/packages/component-sdk/src/runner.ts +++ b/packages/component-sdk/src/runner.ts @@ -330,7 +330,7 @@ async function runDockerWithStandardIO( }); reject(new ContainerError(`Docker container failed with exit code ${code}: ${stderr}`, { - details: { exitCode: code, stderr, dockerArgs: formatArgs(dockerArgs) }, + details: { exitCode: code, stderr, stdout, dockerArgs: formatArgs(dockerArgs) }, })); return; } @@ -461,6 +461,7 @@ async function runDockerWithPty( { details: { exitCode, + stdout, dockerArgs: formatArgs(dockerArgs), }, }, diff --git a/worker/src/components/security/amass.ts b/worker/src/components/security/amass.ts index 0ef9608d..b0029aed 100644 --- a/worker/src/components/security/amass.ts +++ b/worker/src/components/security/amass.ts @@ -629,20 +629,46 @@ const definition = defineComponent({ volumes: [volume.getVolumeConfig(CONTAINER_INPUT_DIR, true)], }; - const result = await runComponentWithRunner( - runnerConfig, - async () => ({}) as Output, - { domains: inputs.domains }, - context, - ); - - // Get raw output (either string or from object) - if (typeof result === 'string') { - rawOutput = result; - } else if (result && typeof result === 'object' && 'rawOutput' in result) { - rawOutput = String((result as any).rawOutput ?? ''); - } else { - rawOutput = ''; + try { + const result = await runComponentWithRunner( + runnerConfig, + async () => ({}) as Output, + { domains: inputs.domains }, + context, + ); + + // Get raw output (either string or from object) + if (typeof result === 'string') { + rawOutput = result; + } else if (result && typeof result === 'object' && 'rawOutput' in result) { + rawOutput = String((result as any).rawOutput ?? ''); + } else { + rawOutput = ''; + } + } catch (error) { + // Amass can exit non-zero when some data sources fail or rate-limit, + // even though it still printed valid findings. Preserve partial results + // instead of failing the entire workflow. + if (error instanceof ContainerError) { + const details = (error as any).details as Record | undefined; + const capturedStdout = details?.stdout; + if (typeof capturedStdout === 'string' && capturedStdout.trim().length > 0) { + context.logger.warn( + `[Amass] Container exited non-zero but produced output. Preserving partial results.`, + ); + context.emitProgress({ + message: 'Amass exited with errors but found some results', + level: 'warn', + data: { exitCode: details?.exitCode }, + }); + rawOutput = capturedStdout; + } else { + // No output captured - re-throw the original error + throw error; + } + } else { + throw error; + } } } finally { // Always cleanup the volume diff --git a/worker/src/components/security/subfinder.ts b/worker/src/components/security/subfinder.ts index 2b4c9257..d798af6b 100644 --- a/worker/src/components/security/subfinder.ts +++ b/worker/src/components/security/subfinder.ts @@ -427,20 +427,46 @@ const definition = defineComponent({ volumes: [volume.getVolumeConfig(CONTAINER_INPUT_DIR, true)], }; - const result = await runComponentWithRunner( - runnerConfig, - async () => ({}) as Output, - { domains, providerConfig }, - context, - ); - - // Get raw output (either string or from object) - if (typeof result === 'string') { - rawOutput = result; - } else if (result && typeof result === 'object' && 'rawOutput' in result) { - rawOutput = String((result as any).rawOutput ?? ''); - } else { - rawOutput = ''; + try { + const result = await runComponentWithRunner( + runnerConfig, + async () => ({}) as Output, + { domains, providerConfig }, + context, + ); + + // Get raw output (either string or from object) + if (typeof result === 'string') { + rawOutput = result; + } else if (result && typeof result === 'object' && 'rawOutput' in result) { + rawOutput = String((result as any).rawOutput ?? ''); + } else { + rawOutput = ''; + } + } catch (error) { + // Subfinder can exit non-zero when some sources fail or rate-limit, + // even though it still printed valid findings. Preserve partial results + // instead of failing the entire workflow. + if (error instanceof ContainerError) { + const details = (error as any).details as Record | undefined; + const capturedStdout = details?.stdout; + if (typeof capturedStdout === 'string' && capturedStdout.trim().length > 0) { + context.logger.warn( + `[Subfinder] Container exited non-zero but produced output. Preserving partial results.`, + ); + context.emitProgress({ + message: 'Subfinder exited with errors but found some results', + level: 'warn', + data: { exitCode: details?.exitCode }, + }); + rawOutput = capturedStdout; + } else { + // No output captured - re-throw the original error + throw error; + } + } else { + throw error; + } } } finally { // Always cleanup the volume From 8cf9e382f1af0ebe251ad84d2f5765298605013c Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Mon, 26 Jan 2026 23:27:59 -0500 Subject: [PATCH 04/14] feat(infra): add nginx reverse proxy and production security - Add nginx reverse proxy for unified entry point at http://localhost - Routes: / (frontend), /api (backend), /analytics (OpenSearch Dashboards) - Configure OpenSearch Dashboards with /analytics base path - Add production deployment with TLS and security plugin - SaaS multitenancy with per-customer tenant isolation - Certificate generation script (just generate-certs) - New commands: just dev, just prod-secure Signed-off-by: Aseem Shrey --- Dockerfile | 4 + docker/PRODUCTION.md | 223 ++++++++++++++++++ docker/README.md | 155 ++++++++++++ docker/certs/.gitignore | 7 + docker/docker-compose.full.yml | 112 ++++++++- docker/docker-compose.infra.yml | 84 +++++++ docker/docker-compose.prod.yml | 89 +++++++ docker/nginx/nginx.dev.conf | 189 +++++++++++++++ docker/nginx/nginx.full.conf | 185 +++++++++++++++ docker/nginx/nginx.prod.conf | 182 ++++++++++++++ docker/opensearch-dashboards.prod.yml | 57 +++++ docker/opensearch-dashboards.yml | 30 +++ docker/opensearch-init.sh | 69 ++++++ docker/opensearch-security/internal_users.yml | 61 +++++ docker/opensearch-security/roles.yml | 140 +++++++++++ docker/opensearch-security/roles_mapping.yml | 64 +++++ docker/opensearch-security/tenants.yml | 28 +++ docker/scripts/generate-certs.sh | 91 +++++++ justfile | 96 +++++++- pm2.config.cjs | 37 +++ 20 files changed, 1884 insertions(+), 19 deletions(-) create mode 100644 docker/PRODUCTION.md create mode 100644 docker/README.md create mode 100644 docker/certs/.gitignore create mode 100644 docker/docker-compose.prod.yml create mode 100644 docker/nginx/nginx.dev.conf create mode 100644 docker/nginx/nginx.full.conf create mode 100644 docker/nginx/nginx.prod.conf create mode 100644 docker/opensearch-dashboards.prod.yml create mode 100644 docker/opensearch-dashboards.yml create mode 100755 docker/opensearch-init.sh create mode 100644 docker/opensearch-security/internal_users.yml create mode 100644 docker/opensearch-security/roles.yml create mode 100644 docker/opensearch-security/roles_mapping.yml create mode 100644 docker/opensearch-security/tenants.yml create mode 100755 docker/scripts/generate-certs.sh diff --git a/Dockerfile b/Dockerfile index 77e0e3cb..dddabf89 100644 --- a/Dockerfile +++ b/Dockerfile @@ -89,6 +89,7 @@ ARG VITE_DEFAULT_ORG_ID=local-dev ARG VITE_GIT_SHA=unknown ARG VITE_PUBLIC_POSTHOG_KEY="" ARG VITE_PUBLIC_POSTHOG_HOST="" +ARG VITE_OPENSEARCH_DASHBOARDS_URL="" ENV VITE_AUTH_PROVIDER=${VITE_AUTH_PROVIDER} ENV VITE_CLERK_PUBLISHABLE_KEY=${VITE_CLERK_PUBLISHABLE_KEY} @@ -98,6 +99,7 @@ ENV VITE_DEFAULT_ORG_ID=${VITE_DEFAULT_ORG_ID} ENV VITE_GIT_SHA=${VITE_GIT_SHA} ENV VITE_PUBLIC_POSTHOG_KEY=${VITE_PUBLIC_POSTHOG_KEY} ENV VITE_PUBLIC_POSTHOG_HOST=${VITE_PUBLIC_POSTHOG_HOST} +ENV VITE_OPENSEARCH_DASHBOARDS_URL=${VITE_OPENSEARCH_DASHBOARDS_URL} # Set working directory for frontend USER shipsec @@ -126,6 +128,7 @@ ARG VITE_DEFAULT_ORG_ID=local-dev ARG VITE_GIT_SHA=unknown ARG VITE_PUBLIC_POSTHOG_KEY="" ARG VITE_PUBLIC_POSTHOG_HOST="" +ARG VITE_OPENSEARCH_DASHBOARDS_URL="" ENV VITE_AUTH_PROVIDER=${VITE_AUTH_PROVIDER} ENV VITE_CLERK_PUBLISHABLE_KEY=${VITE_CLERK_PUBLISHABLE_KEY} @@ -135,6 +138,7 @@ ENV VITE_DEFAULT_ORG_ID=${VITE_DEFAULT_ORG_ID} ENV VITE_GIT_SHA=${VITE_GIT_SHA} ENV VITE_PUBLIC_POSTHOG_KEY=${VITE_PUBLIC_POSTHOG_KEY} ENV VITE_PUBLIC_POSTHOG_HOST=${VITE_PUBLIC_POSTHOG_HOST} +ENV VITE_OPENSEARCH_DASHBOARDS_URL=${VITE_OPENSEARCH_DASHBOARDS_URL} # Set working directory for frontend USER shipsec diff --git a/docker/PRODUCTION.md b/docker/PRODUCTION.md new file mode 100644 index 00000000..dd5908d0 --- /dev/null +++ b/docker/PRODUCTION.md @@ -0,0 +1,223 @@ +# Production Deployment Guide + +This guide covers deploying the analytics infrastructure with security and SaaS multitenancy enabled. + +## Overview + +| Environment | Security | Multitenancy | Use Case | +|-------------|----------|--------------|----------| +| Development | Disabled | No | Local development, fast iteration | +| Production | Enabled | Yes (Strict) | Multi-tenant SaaS deployment | + +## SaaS Multitenancy Model + +**Key Principles:** +- Each customer gets complete data isolation by default +- No shared dashboards - sharing is explicitly opt-in +- Each customer has their own index pattern (`{customer_id}-*`) +- Tenants, roles, and users are created dynamically via backend + +**Index Naming Convention:** +``` +{customer_id}-analytics-* # Analytics data +{customer_id}-workflows-* # Workflow results +{customer_id}-scans-* # Scan results +``` + +## Quick Start (Production) + +```bash +# 1. Generate TLS certificates +./scripts/generate-certs.sh + +# 2. Set required environment variables +export OPENSEARCH_ADMIN_PASSWORD="your-secure-admin-password" +export OPENSEARCH_DASHBOARDS_PASSWORD="your-secure-dashboards-password" + +# 3. Start with production configuration +docker compose -f docker-compose.infra.yml -f docker-compose.prod.yml up -d +``` + +## Files Overview + +| File | Purpose | +|------|---------| +| `docker-compose.infra.yml` | Base infrastructure (dev mode, PM2 on host) | +| `docker-compose.full.yml` | Full stack containerized (simple prod, no security) | +| `docker-compose.prod.yml` | Security overlay (combines with infra.yml for SaaS) | +| `nginx/nginx.dev.conf` | Nginx routing to host (PM2 services) | +| `nginx/nginx.prod.conf` | Nginx routing to containers | +| `opensearch-dashboards.yml` | Dashboards config (dev) | +| `opensearch-dashboards.prod.yml` | Dashboards config (prod with multitenancy) | +| `scripts/generate-certs.sh` | TLS certificate generator | +| `opensearch-security/` | Security plugin configuration | +| `certs/` | Generated certificates (gitignored) | + +See [README.md](README.md) for detailed usage of each compose file. + +## Customer Provisioning (Backend Integration) + +When a new customer is onboarded, the backend must create: + +### 1. Create Customer Tenant +```bash +PUT /_plugins/_security/api/tenants/{customer_id} +{ + "description": "Tenant for customer {customer_id}" +} +``` + +### 2. Create Customer Role (with Index Isolation) +```bash +PUT /_plugins/_security/api/roles/customer_{customer_id}_rw +{ + "cluster_permissions": ["cluster_composite_ops_ro"], + "index_permissions": [{ + "index_patterns": ["{customer_id}-*"], + "allowed_actions": ["read", "write", "create_index", "indices:data/read/*", "indices:data/write/*"] + }], + "tenant_permissions": [{ + "tenant_patterns": ["{customer_id}"], + "allowed_actions": ["kibana_all_write"] + }] +} +``` + +### 3. Create Customer User +```bash +PUT /_plugins/_security/api/internalusers/{user_email} +{ + "password": "hashed_password", + "backend_roles": ["customer_{customer_id}"], + "attributes": { + "customer_id": "{customer_id}", + "email": "{user_email}" + } +} +``` + +### 4. Map User to Role +```bash +PUT /_plugins/_security/api/rolesmapping/customer_{customer_id}_rw +{ + "users": ["{user_email}"], + "backend_roles": ["customer_{customer_id}"] +} +``` + +## Security Configuration + +### TLS Certificates + +The `scripts/generate-certs.sh` script generates: + +- **root-ca.pem** - Root certificate authority +- **node.pem / node-key.pem** - OpenSearch node certificate +- **admin.pem / admin-key.pem** - Admin certificate for cluster management + +For production: +- Use a proper CA (Let's Encrypt, internal PKI) +- Store private keys in a secrets manager (Vault, AWS Secrets Manager) +- Set up certificate rotation before expiration + +### System Users + +Only two system users are defined (in `internal_users.yml`): + +| User | Purpose | +|------|---------| +| `admin` | Platform operations - DO NOT give to customers | +| `kibanaserver` | Dashboards backend communication | + +Customer users are created dynamically via the Security REST API. + +### Password Hashing + +Generate password hashes for users: +```bash +docker run -it opensearchproject/opensearch:2.11.1 \ + /usr/share/opensearch/plugins/opensearch-security/tools/hash.sh -p YOUR_PASSWORD +``` + +## Data Isolation Verification + +After setting up a customer, verify isolation: + +```bash +# As customer user - should only see their data +curl -u user@customer.com:password \ + "https://localhost:9200/{customer_id}-*/_search" + +# Should NOT be able to access other customer's data (403 Forbidden) +curl -u user@customer.com:password \ + "https://localhost:9200/other_customer-*/_search" +``` + +## Environment Variables + +| Variable | Required | Description | +|----------|----------|-------------| +| `OPENSEARCH_ADMIN_PASSWORD` | Yes | Admin user password | +| `OPENSEARCH_DASHBOARDS_PASSWORD` | Yes | kibanaserver user password | + +## Updating Security Configuration + +After modifying security files, apply changes: + +```bash +docker exec -it shipsec-opensearch \ + /usr/share/opensearch/plugins/opensearch-security/tools/securityadmin.sh \ + -cd /usr/share/opensearch/config/opensearch-security \ + -icl -nhnv \ + -cacert /usr/share/opensearch/config/certs/root-ca.pem \ + -cert /usr/share/opensearch/config/certs/admin.pem \ + -key /usr/share/opensearch/config/certs/admin-key.pem +``` + +## Troubleshooting + +### Container fails to start + +Check logs: +```bash +docker logs shipsec-opensearch +docker logs shipsec-opensearch-dashboards +``` + +Common issues: +- Certificate permissions (should be 600 for keys, 644 for certs) +- Missing environment variables +- Incorrect certificate paths + +### Cannot connect to secured cluster + +```bash +# Test with curl +curl -k -u admin:PASSWORD https://localhost:9200/_cluster/health +``` + +### Customer cannot see their dashboards + +1. Verify tenant was created for customer +2. Check user has correct backend_roles +3. Verify role has correct tenant_permissions +4. Check index pattern matches customer's indices + +### Cross-tenant data leak + +If a customer can see another customer's data: +1. Verify index_patterns in role are correctly scoped to `{customer_id}-*` +2. Check role mapping is correct +3. Ensure user's backend_roles match their customer ID + +## Switching Between Environments + +**Development (no security):** +```bash +docker compose -f docker-compose.infra.yml up -d +``` + +**Production (with security):** +```bash +docker compose -f docker-compose.infra.yml -f docker-compose.prod.yml up -d +``` diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 00000000..6b1d9a5c --- /dev/null +++ b/docker/README.md @@ -0,0 +1,155 @@ +# Docker Configuration + +This directory contains Docker Compose configurations for running ShipSec Studio in different environments. + +## Docker Compose Files + +| File | Purpose | When to Use | +|------|---------|-------------| +| `docker-compose.infra.yml` | Infrastructure services only | Development with PM2 (frontend/backend on host) | +| `docker-compose.full.yml` | Full stack in containers | Self-hosted deployment, all services containerized | +| `docker-compose.prod.yml` | Security overlay | Production SaaS with multitenancy (overlays infra.yml) | + +## Environment Modes + +### Development Mode (`just dev`) + +```bash +just dev +``` + +- **Compose file**: `docker-compose.infra.yml` +- **Frontend/Backend**: Run via PM2 on host machine +- **Infrastructure**: Runs in Docker (Postgres, Redis, Temporal, OpenSearch, etc.) +- **Nginx**: Uses `nginx.dev.conf` pointing to `host.docker.internal` +- **Security**: Disabled for fast iteration + +**Access:** +- Frontend: http://localhost:5173 +- Backend: http://localhost:3211 +- Analytics: http://localhost:5601/analytics/ + +### Production Mode (`just prod`) + +```bash +just prod +``` + +- **Compose file**: `docker-compose.full.yml` +- **All services**: Run as Docker containers +- **Nginx**: Unified entry point on port 80 +- **Security**: Disabled (simple deployment) + +**Access (all via port 80):** +- Frontend: http://localhost/ +- Backend API: http://localhost/api/ +- Analytics: http://localhost/analytics/ + +**Nginx Routing (nginx.full.conf):** + +| Path | Target Container | Port | +|------|------------------|------| +| `/analytics/*` | opensearch-dashboards | 5601 | +| `/api/*` | backend | 3211 | +| `/*` | frontend | 8080 | + +> **Note:** Frontend and backend containers only expose ports internally. All external traffic flows through nginx on port 80. + +### Production Secure Mode (`just prod-secure`) + +```bash +just generate-certs +export OPENSEARCH_ADMIN_PASSWORD='secure-password' +export OPENSEARCH_DASHBOARDS_PASSWORD='secure-password' +just prod-secure +``` + +- **Compose files**: `docker-compose.infra.yml` + `docker-compose.prod.yml` (overlay) +- **Security**: TLS enabled, authentication required +- **Multitenancy**: Strict SaaS isolation per customer +- **Nginx**: Uses `nginx.prod.conf` with container networking + +**Access:** +- Analytics: https://localhost/analytics (auth required) +- OpenSearch: https://localhost:9200 (TLS) + +## Nginx Configuration + +| File | Target Services | Use Case | +|------|-----------------|----------| +| `nginx/nginx.dev.conf` | `host.docker.internal:5173/3211` | Dev (PM2 on host) | +| `nginx/nginx.full.conf` | `frontend:8080`, `backend:3211`, `opensearch-dashboards:5601` | Full stack (all containerized) | +| `nginx/nginx.prod.conf` | Same as full + TLS | Prod with security | + +### Routing Architecture + +All modes use nginx as a reverse proxy with unified routing: + +``` +┌─────────────────────────────────────────────────┐ +│ Nginx (port 80/443) │ +├─────────────────────────────────────────────────┤ +│ /analytics/* → OpenSearch Dashboards:5601 │ +│ /api/* → Backend:3211 │ +│ /* → Frontend:8080 │ +└─────────────────────────────────────────────────┘ +``` + +### OpenSearch Dashboards BasePath + +OpenSearch Dashboards is configured with `server.basePath: "/analytics"` to work behind nginx: +- Incoming requests: `/analytics/app/discover` → internally processed as `/app/discover` +- Outgoing URLs: Automatically prefixed with `/analytics` + +## Analytics Pipeline + +The worker service writes analytics data to OpenSearch via the Analytics Sink component. + +**Required Environment Variable:** +```yaml +OPENSEARCH_URL=http://opensearch:9200 +``` + +This is pre-configured in `docker-compose.full.yml`. For detailed analytics documentation, see [docs/analytics.md](../docs/analytics.md). + +## Directory Structure + +``` +docker/ +├── docker-compose.infra.yml # Infrastructure (dev base) +├── docker-compose.full.yml # Full stack containerized +├── docker-compose.prod.yml # Security overlay for prod +├── nginx/ +│ ├── nginx.dev.conf # Routes to host (PM2) +│ └── nginx.prod.conf # Routes to containers +├── opensearch-dashboards.yml # Dev dashboards config +├── opensearch-dashboards.prod.yml # Prod dashboards config +├── opensearch-security/ # Security plugin configs +│ ├── internal_users.yml +│ ├── roles.yml +│ ├── roles_mapping.yml +│ └── tenants.yml +├── scripts/ +│ └── generate-certs.sh # TLS certificate generator +├── certs/ # Generated certs (gitignored) +├── PRODUCTION.md # Production deployment guide +└── README.md # This file +``` + +## Quick Reference + +| Command | Description | +|---------|-------------| +| `just dev` | Start dev environment (PM2 + Docker infra) | +| `just dev stop` | Stop dev environment | +| `just prod` | Start full stack in Docker | +| `just prod stop` | Stop production | +| `just prod-secure` | Start with security & multitenancy | +| `just generate-certs` | Generate TLS certificates | +| `just infra up` | Start infrastructure only | +| `just help` | Show all available commands | + +## See Also + +- [PRODUCTION.md](PRODUCTION.md) - Detailed production deployment and customer provisioning guide +- [docs/analytics.md](../docs/analytics.md) - Analytics pipeline and OpenSearch configuration diff --git a/docker/certs/.gitignore b/docker/certs/.gitignore new file mode 100644 index 00000000..5ae618b7 --- /dev/null +++ b/docker/certs/.gitignore @@ -0,0 +1,7 @@ +# Ignore generated certificates and private keys +# These should NEVER be committed to version control +*.pem +*.key +*.crt +*.csr +*.srl diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml index c54b3d3b..5fe42d88 100644 --- a/docker/docker-compose.full.yml +++ b/docker/docker-compose.full.yml @@ -40,7 +40,7 @@ services: - temporal_data:/var/lib/temporal restart: unless-stopped healthcheck: - test: ["CMD", "tctl", "--address", "localhost:7233", "cluster", "health"] + test: ["CMD-SHELL", "tctl --address $(hostname -i):7233 cluster health"] interval: 30s timeout: 10s retries: 5 @@ -57,7 +57,7 @@ services: - temporal restart: unless-stopped healthcheck: - test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080"] + test: ["CMD", "curl", "-sf", "http://localhost:8080"] interval: 30s timeout: 10s retries: 5 @@ -149,6 +149,65 @@ services: - ./redpanda-console-config.yaml:/etc/redpanda/console-config.yaml:ro restart: unless-stopped + opensearch: + image: opensearchproject/opensearch:2.11.1 + container_name: shipsec-opensearch + environment: + - discovery.type=single-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - DISABLE_SECURITY_PLUGIN=true + - DISABLE_INSTALL_DEMO_CONFIG=true + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + ports: + - "9200:9200" + - "9600:9600" + volumes: + - opensearch_data:/usr/share/opensearch/data + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.11.1 + container_name: shipsec-opensearch-dashboards + depends_on: + opensearch: + condition: service_healthy + environment: + - OPENSEARCH_HOSTS=["http://opensearch:9200"] + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true + expose: + - "5601" + volumes: + - ./opensearch-dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml:ro + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5601/analytics/api/status || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + opensearch-init: + image: curlimages/curl:8.5.0 + container_name: shipsec-opensearch-init + depends_on: + opensearch-dashboards: + condition: service_healthy + volumes: + - ./opensearch-init.sh:/init.sh:ro + entrypoint: ["/bin/sh", "/init.sh"] + restart: "no" + # Applications dind: image: docker:27-dind @@ -200,8 +259,9 @@ services: - CLERK_SECRET_KEY= # Set to 'true' to disable analytics - DISABLE_ANALYTICS=${DISABLE_ANALYTICS:-false} - ports: - - "3211:3211" + # Internal only - accessed via nginx at /api/ + expose: + - "3211" depends_on: postgres: condition: service_healthy @@ -220,28 +280,31 @@ services: dockerfile: Dockerfile target: frontend args: - VITE_API_URL: ${VITE_API_URL:-http://localhost:3211} - VITE_BACKEND_URL: ${VITE_BACKEND_URL:-http://localhost:3211} + VITE_API_URL: ${VITE_API_URL:-http://localhost} + VITE_BACKEND_URL: ${VITE_BACKEND_URL:-http://localhost} VITE_AUTH_PROVIDER: ${VITE_AUTH_PROVIDER:-local} VITE_DEFAULT_ORG_ID: ${VITE_DEFAULT_ORG_ID:-local-dev} VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY:-} VITE_GIT_SHA: ${GIT_SHA:-unknown} VITE_PUBLIC_POSTHOG_KEY: ${VITE_PUBLIC_POSTHOG_KEY:-} VITE_PUBLIC_POSTHOG_HOST: ${VITE_PUBLIC_POSTHOG_HOST:-} + VITE_OPENSEARCH_DASHBOARDS_URL: ${VITE_OPENSEARCH_DASHBOARDS_URL:-/analytics} container_name: shipsec-frontend environment: - - VITE_API_URL=http://localhost:3211 - - VITE_BACKEND_URL=http://localhost:3211 + - VITE_API_URL=${VITE_API_URL:-http://localhost} + - VITE_BACKEND_URL=${VITE_BACKEND_URL:-http://localhost} - VITE_AUTH_PROVIDER=clerk - VITE_DEFAULT_ORG_ID=local-dev - VITE_CLERK_PUBLISHABLE_KEY= - ports: - - "8090:8080" + - VITE_OPENSEARCH_DASHBOARDS_URL=${VITE_OPENSEARCH_DASHBOARDS_URL:-/analytics} + # Internal only - accessed via nginx at / + expose: + - "8080" depends_on: - backend restart: unless-stopped healthcheck: - test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080"] + test: ["CMD", "curl", "-sf", "http://localhost:8080"] interval: 30s timeout: 10s retries: 5 @@ -273,6 +336,8 @@ services: - LOG_KAFKA_CLIENT_ID=shipsec-worker - EVENT_KAFKA_TOPIC=telemetry.events - EVENT_KAFKA_CLIENT_ID=shipsec-worker-events + # OpenSearch for Analytics Sink + - OPENSEARCH_URL=http://opensearch:9200 depends_on: postgres: condition: service_healthy @@ -286,6 +351,8 @@ services: condition: service_healthy redpanda: condition: service_healthy + opensearch: + condition: service_healthy restart: unless-stopped healthcheck: test: ["CMD", "node", "-e", "process.exit(0)"] @@ -293,6 +360,28 @@ services: timeout: 10s retries: 5 + # Nginx reverse proxy - unified entry point + nginx: + image: nginx:1.25-alpine + container_name: shipsec-nginx + depends_on: + frontend: + condition: service_healthy + backend: + condition: service_started + opensearch-dashboards: + condition: service_healthy + ports: + - "80:80" + volumes: + - ./nginx/nginx.full.conf:/etc/nginx/nginx.conf:ro + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + volumes: postgres_data: minio_data: @@ -301,6 +390,7 @@ volumes: docker_data: redis_data: redpanda_data: + opensearch_data: networks: default: diff --git a/docker/docker-compose.infra.yml b/docker/docker-compose.infra.yml index b3e60115..3a7807ef 100644 --- a/docker/docker-compose.infra.yml +++ b/docker/docker-compose.infra.yml @@ -138,6 +138,89 @@ services: - ./redpanda-console-config.yaml:/etc/redpanda/console-config.yaml:ro restart: unless-stopped + opensearch: + image: opensearchproject/opensearch:2.11.1 + container_name: shipsec-opensearch + environment: + - discovery.type=single-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - DISABLE_SECURITY_PLUGIN=true + - DISABLE_INSTALL_DEMO_CONFIG=true + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + ports: + - "9200:9200" + - "9600:9600" + volumes: + - opensearch_data:/usr/share/opensearch/data + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.11.1 + container_name: shipsec-opensearch-dashboards + depends_on: + opensearch: + condition: service_healthy + environment: + - OPENSEARCH_HOSTS=["http://opensearch:9200"] + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true + # Port exposed for development access + # Production uses nginx reverse proxy at /analytics + ports: + - "5601:5601" + volumes: + - ./opensearch-dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml:ro + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5601/analytics/api/status || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + # Initialize OpenSearch Dashboards with default index patterns + opensearch-init: + image: curlimages/curl:8.5.0 + container_name: shipsec-opensearch-init + depends_on: + opensearch-dashboards: + condition: service_healthy + volumes: + - ./opensearch-init.sh:/init.sh:ro + entrypoint: ["/bin/sh", "/init.sh"] + restart: "no" + + # Nginx reverse proxy - unified entry point + # DEV MODE: Uses nginx.dev.conf which points to host.docker.internal for PM2 services + nginx: + image: nginx:1.25-alpine + container_name: shipsec-nginx + depends_on: + opensearch-dashboards: + condition: service_healthy + ports: + - "80:80" + volumes: + - ./nginx/nginx.dev.conf:/etc/nginx/nginx.conf:ro + extra_hosts: + - "host.docker.internal:host-gateway" + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + volumes: postgres_data: minio_data: @@ -145,6 +228,7 @@ volumes: temporal_data: redis_data: redpanda_data: + opensearch_data: networks: default: diff --git a/docker/docker-compose.prod.yml b/docker/docker-compose.prod.yml new file mode 100644 index 00000000..cca75fcb --- /dev/null +++ b/docker/docker-compose.prod.yml @@ -0,0 +1,89 @@ +# Production Docker Compose - OpenSearch with Security & Multitenancy +# +# Usage: +# docker compose -f docker-compose.infra.yml -f docker-compose.prod.yml up -d +# +# Prerequisites: +# 1. Generate TLS certificates: ./scripts/generate-certs.sh +# 2. Set environment variables in .env.prod or export them: +# - OPENSEARCH_ADMIN_PASSWORD (required) +# - OPENSEARCH_DASHBOARDS_PASSWORD (required) +# +# This file overrides the development infrastructure with: +# - Security plugin enabled +# - TLS encryption for transport and HTTP +# - Multitenancy support in OpenSearch Dashboards + +services: + opensearch: + environment: + # Remove security disable flags (override dev settings) + - DISABLE_SECURITY_PLUGIN=false + - DISABLE_INSTALL_DEMO_CONFIG=false + # Security configuration + - plugins.security.ssl.transport.pemcert_filepath=config/certs/node.pem + - plugins.security.ssl.transport.pemkey_filepath=config/certs/node-key.pem + - plugins.security.ssl.transport.pemtrustedcas_filepath=config/certs/root-ca.pem + - plugins.security.ssl.transport.enforce_hostname_verification=false + - plugins.security.ssl.http.enabled=true + - plugins.security.ssl.http.pemcert_filepath=config/certs/node.pem + - plugins.security.ssl.http.pemkey_filepath=config/certs/node-key.pem + - plugins.security.ssl.http.pemtrustedcas_filepath=config/certs/root-ca.pem + - plugins.security.allow_unsafe_democertificates=false + - plugins.security.allow_default_init_securityindex=true + - plugins.security.authcz.admin_dn=CN=admin,OU=ShipSec,O=ShipSecAI,L=SF,ST=CA,C=US + - plugins.security.audit.type=internal_opensearch + - plugins.security.enable_snapshot_restore_privilege=true + - plugins.security.check_snapshot_restore_write_privileges=true + - plugins.security.restapi.roles_enabled=["all_access", "security_rest_api_access"] + - cluster.name=shipsec-prod + - node.name=opensearch-node1 + volumes: + - opensearch_data:/usr/share/opensearch/data + - ./certs:/usr/share/opensearch/config/certs:ro + - ./opensearch-security:/usr/share/opensearch/config/opensearch-security:ro + healthcheck: + test: ["CMD-SHELL", "curl -sf --cacert /usr/share/opensearch/config/certs/root-ca.pem https://localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + opensearch-dashboards: + environment: + # Remove security disable flag (override dev settings) + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=false + - OPENSEARCH_HOSTS=["https://opensearch:9200"] + volumes: + - ./opensearch-dashboards.prod.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml:ro + - ./certs:/usr/share/opensearch-dashboards/config/certs:ro + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:5601/analytics/api/status || exit 1"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 60s + + # Override init script to work with secured cluster + opensearch-init: + environment: + - OPENSEARCH_SECURITY_ENABLED=true + - OPENSEARCH_CA_CERT=/certs/root-ca.pem + volumes: + - ./opensearch-init.sh:/init.sh:ro + - ./certs:/certs:ro + + # Nginx with production config (container service names) + nginx: + volumes: + - ./nginx/nginx.prod.conf:/etc/nginx/nginx.conf:ro + - ./certs:/etc/nginx/certs:ro + ports: + - "80:80" + - "443:443" + +volumes: + opensearch_data: + +networks: + default: + name: shipsec-network diff --git a/docker/nginx/nginx.dev.conf b/docker/nginx/nginx.dev.conf new file mode 100644 index 00000000..8f4b1f3d --- /dev/null +++ b/docker/nginx/nginx.dev.conf @@ -0,0 +1,189 @@ +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/xml application/json application/javascript + application/rss+xml application/atom+xml image/svg+xml; + + # ================================================================= + # DEVELOPMENT MODE - Frontend & Backend run on host via PM2 + # Uses host.docker.internal to reach host machine from container + # ================================================================= + + # Upstream definitions - pointing to host machine (PM2 services) + upstream frontend { + # Vite dev server on host + server host.docker.internal:5173; + keepalive 32; + } + + upstream backend { + # NestJS backend on host + server host.docker.internal:3211; + keepalive 32; + } + + # OpenSearch Dashboards runs in Docker + upstream opensearch-dashboards { + server opensearch-dashboards:5601; + keepalive 32; + } + + # WebSocket connection upgrade map + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 80; + server_name _; + + # Client request body size (for file uploads) + client_max_body_size 100M; + client_body_buffer_size 10M; + + # Proxy buffer settings + proxy_buffer_size 128k; + proxy_buffers 4 256k; + proxy_busy_buffers_size 256k; + + # Common proxy headers + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # ================================================================= + # Auth validation endpoint (public, proxied to backend) + # ================================================================= + location = /auth/validate { + proxy_pass http://backend/api/v1/auth/validate; + proxy_set_header Cookie $http_cookie; + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # Internal auth validation endpoint for auth_request + # ================================================================= + location = /_auth { + internal; + proxy_pass http://backend/api/v1/auth/validate; + proxy_pass_request_body off; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + # Pass cookies for session auth + proxy_set_header Cookie $http_cookie; + # Pass Authorization header for API key/token auth + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # OpenSearch Dashboards - /analytics/* (PROTECTED) + # ================================================================= + location /analytics/ { + # Require authentication before proxying + auth_request /_auth; + # On auth failure, redirect to login page + error_page 401 = @auth_redirect; + + proxy_pass http://opensearch-dashboards; + + # WebSocket support for dashboards real-time features + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Timeouts for dashboards (can be slow for large queries) + proxy_connect_timeout 60s; + proxy_send_timeout 120s; + proxy_read_timeout 120s; + + # Dashboards-specific headers + proxy_set_header osd-xsrf "true"; + + # Preserve cookies + proxy_cookie_path /analytics/ /analytics/; + + # No redirect rewriting needed - we preserve the path + proxy_redirect off; + } + + # Auth redirect handler - redirect to home with return URL + location @auth_redirect { + return 302 /?returnTo=$request_uri; + } + + # Exact match for /analytics without trailing slash + location = /analytics { + return 301 /analytics/; + } + + # ================================================================= + # Backend API - /api/* + # ================================================================= + location /api/ { + proxy_pass http://backend/api/; + + # WebSocket support for terminal/streaming endpoints + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # API timeouts + proxy_connect_timeout 30s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + + # Don't buffer API responses (important for streaming) + proxy_buffering off; + } + + # ================================================================= + # Frontend (SPA) - /* (catch-all) + # ================================================================= + location / { + proxy_pass http://frontend/; + + # WebSocket support for Vite HMR in development + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Frontend timeouts - longer read timeout for HMR WebSocket + proxy_connect_timeout 30s; + proxy_send_timeout 30s; + proxy_read_timeout 86400s; # 24 hours - keep HMR WebSocket alive + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + } +} diff --git a/docker/nginx/nginx.full.conf b/docker/nginx/nginx.full.conf new file mode 100644 index 00000000..f8805296 --- /dev/null +++ b/docker/nginx/nginx.full.conf @@ -0,0 +1,185 @@ +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/xml application/json application/javascript + application/rss+xml application/atom+xml image/svg+xml; + + # ================================================================= + # FULL DOCKER MODE - All services run in Docker containers + # ================================================================= + + # Upstream definitions - Docker container names + upstream frontend { + server frontend:8080; + keepalive 32; + } + + upstream backend { + server backend:3211; + keepalive 32; + } + + upstream opensearch-dashboards { + server opensearch-dashboards:5601; + keepalive 32; + } + + # WebSocket connection upgrade map + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 80; + server_name _; + + # Client request body size (for file uploads) + client_max_body_size 100M; + client_body_buffer_size 10M; + + # Proxy buffer settings + proxy_buffer_size 128k; + proxy_buffers 4 256k; + proxy_busy_buffers_size 256k; + + # Common proxy headers + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # ================================================================= + # Auth validation endpoint (public, proxied to backend) + # ================================================================= + location = /auth/validate { + proxy_pass http://backend/api/v1/auth/validate; + proxy_set_header Cookie $http_cookie; + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # Internal auth validation endpoint for auth_request + # ================================================================= + location = /_auth { + internal; + proxy_pass http://backend/api/v1/auth/validate; + proxy_pass_request_body off; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + # Pass cookies for session auth + proxy_set_header Cookie $http_cookie; + # Pass Authorization header for API key/token auth + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # OpenSearch Dashboards - /analytics/* (PROTECTED) + # ================================================================= + location /analytics/ { + # Require authentication before proxying + auth_request /_auth; + # On auth failure, redirect to login page + error_page 401 = @auth_redirect; + + proxy_pass http://opensearch-dashboards; + + # WebSocket support for dashboards real-time features + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Timeouts for dashboards (can be slow for large queries) + proxy_connect_timeout 60s; + proxy_send_timeout 120s; + proxy_read_timeout 120s; + + # Dashboards-specific headers + proxy_set_header osd-xsrf "true"; + + # Preserve cookies + proxy_cookie_path /analytics/ /analytics/; + + # No redirect rewriting needed - we preserve the path + proxy_redirect off; + } + + # Auth redirect handler - redirect to home with return URL + location @auth_redirect { + return 302 /?returnTo=$request_uri; + } + + # Exact match for /analytics without trailing slash + location = /analytics { + return 301 /analytics/; + } + + # ================================================================= + # Backend API - /api/* + # ================================================================= + location /api/ { + proxy_pass http://backend/api/; + + # WebSocket support for terminal/streaming endpoints + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # API timeouts + proxy_connect_timeout 30s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + + # Don't buffer API responses (important for streaming) + proxy_buffering off; + } + + # ================================================================= + # Frontend (SPA) - /* (catch-all) + # ================================================================= + location / { + proxy_pass http://frontend/; + + # WebSocket support for HMR (if running dev build) + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Frontend timeouts + proxy_connect_timeout 30s; + proxy_send_timeout 30s; + proxy_read_timeout 30s; + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + } +} diff --git a/docker/nginx/nginx.prod.conf b/docker/nginx/nginx.prod.conf new file mode 100644 index 00000000..36ea580c --- /dev/null +++ b/docker/nginx/nginx.prod.conf @@ -0,0 +1,182 @@ +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/xml application/json application/javascript + application/rss+xml application/atom+xml image/svg+xml; + + # Upstream definitions + upstream frontend { + server frontend:8080; + keepalive 32; + } + + upstream backend { + server backend:3211; + keepalive 32; + } + + upstream opensearch-dashboards { + server opensearch-dashboards:5601; + keepalive 32; + } + + # WebSocket connection upgrade map + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 80; + server_name _; + + # Client request body size (for file uploads) + client_max_body_size 100M; + client_body_buffer_size 10M; + + # Proxy buffer settings + proxy_buffer_size 128k; + proxy_buffers 4 256k; + proxy_busy_buffers_size 256k; + + # Common proxy headers + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + + # ================================================================= + # Auth validation endpoint (public, proxied to backend) + # ================================================================= + location = /auth/validate { + proxy_pass http://backend/api/v1/auth/validate; + proxy_set_header Cookie $http_cookie; + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # Internal auth validation endpoint for auth_request + # ================================================================= + location = /_auth { + internal; + proxy_pass http://backend/api/v1/auth/validate; + proxy_pass_request_body off; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + # Pass cookies for session auth + proxy_set_header Cookie $http_cookie; + # Pass Authorization header for API key/token auth + proxy_set_header Authorization $http_authorization; + } + + # ================================================================= + # OpenSearch Dashboards - /analytics/* (PROTECTED) + # ================================================================= + location /analytics/ { + # Require authentication before proxying + auth_request /_auth; + # On auth failure, redirect to login page + error_page 401 = @auth_redirect; + + proxy_pass http://opensearch-dashboards/; + + # WebSocket support for dashboards real-time features + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Timeouts for dashboards (can be slow for large queries) + proxy_connect_timeout 60s; + proxy_send_timeout 120s; + proxy_read_timeout 120s; + + # Dashboards-specific headers + proxy_set_header osd-xsrf "true"; + + # Preserve cookies + proxy_cookie_path / /analytics/; + + # Handle redirects from dashboards + proxy_redirect / /analytics/; + proxy_redirect http://opensearch-dashboards:5601/ /analytics/; + } + + # Auth redirect handler - redirect to home with return URL + location @auth_redirect { + return 302 /?returnTo=$request_uri; + } + + # Exact match for /analytics without trailing slash + location = /analytics { + return 301 /analytics/; + } + + # ================================================================= + # Backend API - /api/* + # ================================================================= + location /api/ { + proxy_pass http://backend/api/; + + # WebSocket support for terminal/streaming endpoints + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # API timeouts + proxy_connect_timeout 30s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + + # Don't buffer API responses (important for streaming) + proxy_buffering off; + } + + # ================================================================= + # Frontend (SPA) - /* (catch-all) + # ================================================================= + location / { + proxy_pass http://frontend/; + + # WebSocket support for Vite HMR in development + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + + # Frontend timeouts + proxy_connect_timeout 30s; + proxy_send_timeout 30s; + proxy_read_timeout 30s; + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + } +} diff --git a/docker/opensearch-dashboards.prod.yml b/docker/opensearch-dashboards.prod.yml new file mode 100644 index 00000000..c53263f3 --- /dev/null +++ b/docker/opensearch-dashboards.prod.yml @@ -0,0 +1,57 @@ +# OpenSearch Dashboards Production Configuration +# Mount this file to /usr/share/opensearch-dashboards/config/opensearch_dashboards.yml +# +# This configuration enables: +# - Security plugin with authentication +# - Multitenancy for tenant isolation +# - TLS for secure communication with OpenSearch + +server.host: "0.0.0.0" +server.port: 5601 + +# Base path configuration for reverse proxy +server.basePath: "/analytics" +server.rewriteBasePath: true + +# OpenSearch connection (HTTPS for production) +opensearch.hosts: ["https://opensearch:9200"] + +# TLS Configuration - trust the CA certificate +opensearch.ssl.verificationMode: certificate +opensearch.ssl.certificateAuthorities: ["/usr/share/opensearch-dashboards/config/certs/root-ca.pem"] + +# Authentication - use OpenSearch Security plugin +opensearch.username: "kibanaserver" +opensearch.password: "${OPENSEARCH_DASHBOARDS_PASSWORD}" +opensearch.requestHeadersWhitelist: ["securitytenant", "Authorization"] + +# Security Plugin Configuration - SaaS Multitenancy +# Each customer gets their own isolated tenant - no shared data by default +opensearch_security.multitenancy.enabled: true +opensearch_security.multitenancy.tenants.enable_global: false +opensearch_security.multitenancy.tenants.enable_private: true +opensearch_security.multitenancy.tenants.preferred: ["Private"] +opensearch_security.readonly_mode.roles: ["kibana_read_only"] +opensearch_security.cookie.secure: true +opensearch_security.cookie.isSameSite: "Strict" + +# Tenant isolation - users only see their tenant's dashboards +# Backend creates tenant dynamically per customer (tenant name = customer ID) + +# Session configuration +opensearch_security.session.ttl: 3600000 +opensearch_security.session.keepalive: true + +# Logging +logging.dest: stdout +logging.silent: false +logging.quiet: false +logging.verbose: false + +# Telemetry (disable for production privacy) +telemetry.enabled: false +telemetry.allowChangingOptInStatus: false + +# CSP headers for security +csp.strict: true +csp.warnLegacyBrowsers: true diff --git a/docker/opensearch-dashboards.yml b/docker/opensearch-dashboards.yml new file mode 100644 index 00000000..cc9dbc6a --- /dev/null +++ b/docker/opensearch-dashboards.yml @@ -0,0 +1,30 @@ +# OpenSearch Dashboards configuration +# Mount this file to /usr/share/opensearch-dashboards/config/opensearch_dashboards.yml +# +# SECURITY NOTE: +# - Local development: Security plugin is disabled (DISABLE_SECURITY_DASHBOARDS_PLUGIN=true in docker-compose) +# - Production: Enable security plugin and configure multitenancy: +# 1. Remove DISABLE_SECURITY_PLUGIN=true from OpenSearch +# 2. Remove DISABLE_SECURITY_DASHBOARDS_PLUGIN=true from Dashboards +# 3. Configure TLS certificates and authentication +# 4. Add: opensearch_security.multitenancy.enabled: true +# 5. Add: opensearch_security.multitenancy.tenants.preferred: ["Private", "Global"] + +server.host: "0.0.0.0" +server.port: 5601 + +# Base path configuration for reverse proxy +server.basePath: "/analytics" +server.rewriteBasePath: true + +# OpenSearch connection +opensearch.hosts: ["http://opensearch:9200"] + +# Logging +logging.dest: stdout +logging.silent: false +logging.quiet: false +logging.verbose: false + +# CSP - relaxed for development (inline scripts needed by dashboards) +csp.strict: false diff --git a/docker/opensearch-init.sh b/docker/opensearch-init.sh new file mode 100755 index 00000000..9ff64282 --- /dev/null +++ b/docker/opensearch-init.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# OpenSearch Dashboards initialization script +# Creates default index patterns and saved objects + +set -e + +# Note: Use /analytics prefix since dashboards is configured with server.basePath=/analytics +DASHBOARDS_URL="${OPENSEARCH_DASHBOARDS_URL:-http://opensearch-dashboards:5601}" +DASHBOARDS_BASE_PATH="/analytics" +MAX_RETRIES=30 +RETRY_INTERVAL=5 + +echo "[opensearch-init] Waiting for OpenSearch Dashboards to be ready..." + +# Wait for Dashboards to be healthy (use basePath) +for i in $(seq 1 $MAX_RETRIES); do + if curl -sf "${DASHBOARDS_URL}${DASHBOARDS_BASE_PATH}/api/status" > /dev/null 2>&1; then + echo "[opensearch-init] OpenSearch Dashboards is ready!" + break + fi + + if [ $i -eq $MAX_RETRIES ]; then + echo "[opensearch-init] ERROR: OpenSearch Dashboards not ready after $((MAX_RETRIES * RETRY_INTERVAL)) seconds" + exit 1 + fi + + echo "[opensearch-init] Waiting for Dashboards... (attempt $i/$MAX_RETRIES)" + sleep $RETRY_INTERVAL +done + +# Check if index pattern already exists +echo "[opensearch-init] Checking for existing index patterns..." +EXISTING=$(curl -sf "${DASHBOARDS_URL}${DASHBOARDS_BASE_PATH}/api/saved_objects/_find?type=index-pattern&search_fields=title&search=security-findings-*" \ + -H "osd-xsrf: true" 2>/dev/null || echo '{"total":0}') + +TOTAL=$(echo "$EXISTING" | grep -o '"total":[0-9]*' | grep -o '[0-9]*' || echo "0") + +if [ "$TOTAL" -gt 0 ]; then + echo "[opensearch-init] Index pattern 'security-findings-*' already exists, skipping creation" +else + echo "[opensearch-init] Creating index pattern 'security-findings-*'..." + + # Use specific ID so dashboards can reference it consistently + RESPONSE=$(curl -sf -X POST "${DASHBOARDS_URL}${DASHBOARDS_BASE_PATH}/api/saved_objects/index-pattern/security-findings-*" \ + -H "Content-Type: application/json" \ + -H "osd-xsrf: true" \ + -d '{ + "attributes": { + "title": "security-findings-*", + "timeFieldName": "@timestamp" + } + }' 2>&1) + + if echo "$RESPONSE" | grep -q '"type":"index-pattern"'; then + echo "[opensearch-init] Successfully created index pattern 'security-findings-*'" + else + echo "[opensearch-init] WARNING: Failed to create index pattern. Response: $RESPONSE" + # Don't fail - the pattern might be created later when data exists + fi +fi + +# Set as default index pattern (optional, helps UX) +echo "[opensearch-init] Setting default index pattern..." +curl -sf -X POST "${DASHBOARDS_URL}${DASHBOARDS_BASE_PATH}/api/opensearch-dashboards/settings" \ + -H "Content-Type: application/json" \ + -H "osd-xsrf: true" \ + -d '{"changes":{"defaultIndex":"security-findings-*"}}' > /dev/null 2>&1 || true + +echo "[opensearch-init] Initialization complete!" diff --git a/docker/opensearch-security/internal_users.yml b/docker/opensearch-security/internal_users.yml new file mode 100644 index 00000000..313b7d50 --- /dev/null +++ b/docker/opensearch-security/internal_users.yml @@ -0,0 +1,61 @@ +# OpenSearch Security - Internal Users (SaaS Model) +# +# USER PROVISIONING STRATEGY: +# Customer users are created dynamically via the Security REST API +# when users are added to the platform. This file only contains +# system users required for platform operations. +# +# Customer user creation example (via backend): +# PUT /_plugins/_security/api/internalusers/{user_email} +# { +# "password": "hashed_password", +# "backend_roles": ["customer_{customer_id}"], +# "attributes": { +# "customer_id": "{customer_id}", +# "email": "{user_email}" +# } +# } +# +# Password hashing: +# docker run -it opensearchproject/opensearch:2.11.1 \ +# /usr/share/opensearch/plugins/opensearch-security/tools/hash.sh -p + +--- +_meta: + type: "internalusers" + config_version: 2 + +# ============================================================================= +# SYSTEM USERS (Platform Operations) +# ============================================================================= + +# Platform admin - for internal operations only +admin: + # CHANGE THIS IN PRODUCTION - hash for "admin" + hash: "$2y$12$QJMOhaNM2dVJQGOVIBJOqOHQhQqq2v7rnE3iyNWMWvqjvjnvZe/Aq" + reserved: true + backend_roles: + - "platform_admin" + attributes: + role: "system" + description: "Platform administrator - internal use only" + +# Dashboards server user - used by OpenSearch Dashboards +kibanaserver: + # CHANGE THIS IN PRODUCTION - hash for "kibanaserver" + hash: "$2y$12$r2uo1.C/6oXP1NnMgQzNxO3LnKCJR2I3ymvY9rUYLQq9cYEITCwfO" + reserved: true + attributes: + role: "system" + description: "Dashboards backend communication user" + +# ============================================================================= +# CUSTOMER USERS +# Note: Customer users are created dynamically by the backend when users +# register or are invited to the platform. +# +# Each customer user will have: +# - backend_roles: ["customer_{customer_id}"] +# - attributes.customer_id: their customer ID +# - Mapped to customer-specific role for index isolation +# ============================================================================= diff --git a/docker/opensearch-security/roles.yml b/docker/opensearch-security/roles.yml new file mode 100644 index 00000000..5d5b5558 --- /dev/null +++ b/docker/opensearch-security/roles.yml @@ -0,0 +1,140 @@ +# OpenSearch Security - Roles Configuration (SaaS Model) +# +# INDEX ISOLATION STRATEGY: +# Each customer's data is stored in indices prefixed with their customer ID: +# {customer_id}-analytics-* +# {customer_id}-workflows-* +# {customer_id}-scans-* +# +# Roles are created dynamically per customer with index patterns that +# restrict access to only their data. This file defines role templates +# and system roles. +# +# Dynamic role creation example (via backend): +# PUT /_plugins/_security/api/roles/customer_{customer_id} +# { +# "cluster_permissions": ["cluster_composite_ops_ro"], +# "index_permissions": [{ +# "index_patterns": ["{customer_id}-*"], +# "allowed_actions": ["read", "indices:data/read/*"] +# }], +# "tenant_permissions": [{ +# "tenant_patterns": ["{customer_id}"], +# "allowed_actions": ["kibana_all_write"] +# }] +# } + +--- +_meta: + type: "roles" + config_version: 2 + +# ============================================================================= +# SYSTEM ROLES (Platform Operations) +# ============================================================================= + +# Platform admin - full access for operators +platform_admin: + reserved: true + cluster_permissions: + - "*" + index_permissions: + - index_patterns: + - "*" + allowed_actions: + - "*" + tenant_permissions: + - tenant_patterns: + - "__platform_admin" + allowed_actions: + - "kibana_all_write" + +# ============================================================================= +# CUSTOMER ROLE TEMPLATE +# These are templates - actual roles are created dynamically per customer +# ============================================================================= + +# Template: Customer read-write access (for active users) +# Actual role name: customer_{customer_id}_rw +# Index pattern: {customer_id}-* +customer_template_rw: + reserved: false + description: "Template for customer read-write roles - DO NOT USE DIRECTLY" + cluster_permissions: + - "cluster_composite_ops_ro" + - "indices:data/read/scroll*" + index_permissions: + - index_patterns: + - "CUSTOMER_ID_PLACEHOLDER-*" + allowed_actions: + - "read" + - "write" + - "create_index" + - "indices:data/read/*" + - "indices:data/write/*" + - "indices:admin/mapping/put" + tenant_permissions: + - tenant_patterns: + - "CUSTOMER_ID_PLACEHOLDER" + allowed_actions: + - "kibana_all_write" + +# Template: Customer read-only access (for viewers) +# Actual role name: customer_{customer_id}_ro +# Index pattern: {customer_id}-* +customer_template_ro: + reserved: false + description: "Template for customer read-only roles - DO NOT USE DIRECTLY" + cluster_permissions: + - "cluster_composite_ops_ro" + index_permissions: + - index_patterns: + - "CUSTOMER_ID_PLACEHOLDER-*" + allowed_actions: + - "read" + - "indices:data/read/*" + tenant_permissions: + - tenant_patterns: + - "CUSTOMER_ID_PLACEHOLDER" + allowed_actions: + - "kibana_all_read" + +# ============================================================================= +# DASHBOARDS INTERNAL ROLES +# ============================================================================= + +# Dashboards server role - for backend communication +kibana_server: + reserved: true + cluster_permissions: + - "cluster_monitor" + - "cluster_composite_ops" + - "indices:admin/template/*" + - "indices:data/read/scroll*" + index_permissions: + - index_patterns: + - ".kibana" + - ".kibana_*" + - ".opensearch_dashboards" + - ".opensearch_dashboards_*" + allowed_actions: + - "indices_all" + - index_patterns: + - "*" + allowed_actions: + - "indices:admin/aliases/get" + - "indices:admin/mappings/get" + +# Read-only dashboard user (for embedding/sharing) +kibana_read_only: + reserved: true + cluster_permissions: + - "cluster_composite_ops_ro" + index_permissions: + - index_patterns: + - ".kibana" + - ".kibana_*" + - ".opensearch_dashboards" + - ".opensearch_dashboards_*" + allowed_actions: + - "read" diff --git a/docker/opensearch-security/roles_mapping.yml b/docker/opensearch-security/roles_mapping.yml new file mode 100644 index 00000000..ff4c8065 --- /dev/null +++ b/docker/opensearch-security/roles_mapping.yml @@ -0,0 +1,64 @@ +# OpenSearch Security - Roles Mapping (SaaS Model) +# +# DYNAMIC ROLE MAPPING: +# Customer role mappings are created dynamically when users are provisioned. +# Each customer user is mapped to their customer-specific role. +# +# Example dynamic mapping creation (via backend): +# PUT /_plugins/_security/api/rolesmapping/customer_{customer_id}_rw +# { +# "users": ["user@customer.com"], +# "backend_roles": ["customer_{customer_id}"] +# } +# +# The backend should: +# 1. Create customer tenant when customer onboards +# 2. Create customer role (customer_{id}_rw or customer_{id}_ro) +# 3. Map user to customer role when user is added + +--- +_meta: + type: "rolesmapping" + config_version: 2 + +# ============================================================================= +# SYSTEM ROLE MAPPINGS +# ============================================================================= + +# Platform admin mapping - internal operators only +platform_admin: + reserved: true + users: + - "admin" + backend_roles: + - "platform_admin" + description: "Platform administrators with full system access" + +# Dashboards server mapping +kibana_server: + reserved: true + users: + - "kibanaserver" + description: "OpenSearch Dashboards server user" + +# Security REST API access - for admin operations +security_rest_api_access: + reserved: true + users: + - "admin" + backend_roles: + - "platform_admin" + description: "Access to Security REST API for tenant/role management" + +# ============================================================================= +# CUSTOMER ROLE MAPPINGS +# Note: Customer-specific mappings are created dynamically by the backend +# when customers and users are provisioned. +# +# Pattern for dynamic mappings: +# Role: customer_{customer_id}_rw +# Users: [list of customer's users with write access] +# +# Role: customer_{customer_id}_ro +# Users: [list of customer's users with read-only access] +# ============================================================================= diff --git a/docker/opensearch-security/tenants.yml b/docker/opensearch-security/tenants.yml new file mode 100644 index 00000000..ae9d0393 --- /dev/null +++ b/docker/opensearch-security/tenants.yml @@ -0,0 +1,28 @@ +# OpenSearch Security - Tenants Configuration (SaaS Model) +# +# TENANT ISOLATION STRATEGY: +# Each customer gets their own isolated tenant and index pattern. +# No shared/global dashboards - sharing is explicitly opt-in. +# +# Tenants are created dynamically via the Security REST API when +# a new customer is onboarded. Tenant name = customer ID. +# +# Index naming convention: {customer_id}-analytics-* +# Each customer's role restricts access to only their indices. +# +# Example dynamic tenant creation (via backend): +# POST /_plugins/_security/api/tenants/{customer_id} +# { "description": "Tenant for customer {customer_id}" } + +--- +_meta: + type: "tenants" + config_version: 2 + +# NOTE: Customer tenants are created dynamically by the application backend +# when customers are onboarded. This file only contains system tenants. + +# Admin tenant - for platform operators only (not customers) +__platform_admin: + reserved: true + description: "Platform administration - internal use only" diff --git a/docker/scripts/generate-certs.sh b/docker/scripts/generate-certs.sh new file mode 100755 index 00000000..4a04c2fd --- /dev/null +++ b/docker/scripts/generate-certs.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Generate TLS certificates for OpenSearch production deployment +# +# This script creates: +# - Root CA certificate and key +# - Node certificate for OpenSearch (server) +# - Admin certificate for cluster management +# +# Usage: ./generate-certs.sh [output-dir] +# +# Requirements: openssl + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +OUTPUT_DIR="${1:-$SCRIPT_DIR/../certs}" +DAYS_VALID=365 + +# Certificate Subject fields +COUNTRY="US" +STATE="CA" +LOCALITY="SF" +ORGANIZATION="ShipSecAI" +ORG_UNIT="ShipSec" + +echo "=== OpenSearch Certificate Generator ===" +echo "Output directory: $OUTPUT_DIR" +echo "" + +# Create output directory +mkdir -p "$OUTPUT_DIR" +cd "$OUTPUT_DIR" + +# Check if certificates already exist +if [[ -f "root-ca.pem" ]]; then + echo "WARNING: Certificates already exist in $OUTPUT_DIR" + read -p "Overwrite existing certificates? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Aborted." + exit 1 + fi +fi + +echo "1. Generating Root CA..." +openssl genrsa -out root-ca-key.pem 2048 +openssl req -new -x509 -sha256 -key root-ca-key.pem -out root-ca.pem -days $DAYS_VALID \ + -subj "/C=$COUNTRY/ST=$STATE/L=$LOCALITY/O=$ORGANIZATION/OU=$ORG_UNIT/CN=Root CA" + +echo "2. Generating Admin Certificate..." +openssl genrsa -out admin-key-temp.pem 2048 +openssl pkcs8 -inform PEM -outform PEM -in admin-key-temp.pem -topk8 -nocrypt -out admin-key.pem +openssl req -new -key admin-key.pem -out admin.csr \ + -subj "/C=$COUNTRY/ST=$STATE/L=$LOCALITY/O=$ORGANIZATION/OU=$ORG_UNIT/CN=admin" +openssl x509 -req -in admin.csr -CA root-ca.pem -CAkey root-ca-key.pem -CAcreateserial \ + -sha256 -out admin.pem -days $DAYS_VALID +rm admin-key-temp.pem admin.csr + +echo "3. Generating Node Certificate..." +# Create extension file for SAN (Subject Alternative Names) +cat > node-ext.cnf << EOF +subjectAltName = DNS:localhost, DNS:opensearch, DNS:opensearch-node1, IP:127.0.0.1 +EOF + +openssl genrsa -out node-key-temp.pem 2048 +openssl pkcs8 -inform PEM -outform PEM -in node-key-temp.pem -topk8 -nocrypt -out node-key.pem +openssl req -new -key node-key.pem -out node.csr \ + -subj "/C=$COUNTRY/ST=$STATE/L=$LOCALITY/O=$ORGANIZATION/OU=$ORG_UNIT/CN=opensearch-node1" +openssl x509 -req -in node.csr -CA root-ca.pem -CAkey root-ca-key.pem -CAcreateserial \ + -sha256 -out node.pem -days $DAYS_VALID -extfile node-ext.cnf +rm node-key-temp.pem node.csr node-ext.cnf + +echo "4. Setting permissions..." +chmod 600 *-key.pem +chmod 644 *.pem + +echo "" +echo "=== Certificates Generated Successfully ===" +echo "" +echo "Files created in $OUTPUT_DIR:" +ls -la "$OUTPUT_DIR" +echo "" +echo "Next steps:" +echo " 1. Review the certificates" +echo " 2. Set OPENSEARCH_ADMIN_PASSWORD and OPENSEARCH_DASHBOARDS_PASSWORD environment variables" +echo " 3. Run: docker compose -f docker-compose.infra.yml -f docker-compose.prod.yml up -d" +echo "" +echo "For production deployments:" +echo " - Use proper certificate authority (e.g., Let's Encrypt, internal CA)" +echo " - Store private keys securely (e.g., HashiCorp Vault, AWS Secrets Manager)" +echo " - Rotate certificates before expiration ($DAYS_VALID days)" diff --git a/justfile b/justfile index f42c6420..eee7ba41 100644 --- a/justfile +++ b/justfile @@ -111,8 +111,9 @@ prod action="start": docker compose -f docker/docker-compose.full.yml up -d echo "" echo "✅ Production environment ready" - echo " Frontend: http://localhost:8090" - echo " Backend: http://localhost:3211" + echo " App: http://localhost" + echo " API: http://localhost/api" + echo " Analytics: http://localhost/analytics" echo " Temporal UI: http://localhost:8081" echo "" @@ -138,8 +139,9 @@ prod action="start": docker compose -f docker/docker-compose.full.yml up -d --build echo "✅ Production built and started" - echo " Frontend: http://localhost:8090" - echo " Backend: http://localhost:3211" + echo " App: http://localhost" + echo " API: http://localhost/api" + echo " Analytics: http://localhost/analytics" echo "" # Version check @@ -186,8 +188,9 @@ prod action="start": echo "" echo "✅ ShipSec Studio $LATEST_TAG ready" - echo " Frontend: http://localhost:8090" - echo " Backend: http://localhost:3211" + echo " App: http://localhost" + echo " API: http://localhost/api" + echo " Analytics: http://localhost/analytics" echo " Temporal UI: http://localhost:8081" echo "" echo "💡 Note: Using images tagged as $LATEST_TAG" @@ -230,8 +233,9 @@ prod-images action="start": DOCKER_BUILDKIT=1 docker compose -f docker/docker-compose.full.yml up -d echo "" echo "✅ Production environment ready" - echo " Frontend: http://localhost:8090" - echo " Backend: http://localhost:3211" + echo " App: http://localhost" + echo " API: http://localhost/api" + echo " Analytics: http://localhost/analytics" echo " Temporal UI: http://localhost:8081" ;; stop) @@ -286,6 +290,75 @@ prod-images action="start": ;; esac +# === Production Secure (with Security & Multitenancy) === + +# Run production with OpenSearch security and SaaS multitenancy +prod-secure action="start": + #!/usr/bin/env bash + set -euo pipefail + case "{{action}}" in + start) + echo "🔐 Starting secure production environment..." + + # Check for certificates + if [ ! -f "docker/certs/root-ca.pem" ]; then + echo "❌ TLS certificates not found!" + echo "" + echo " Run: just generate-certs" + exit 1 + fi + + # Check for required env vars + if [ -z "${OPENSEARCH_ADMIN_PASSWORD:-}" ] || [ -z "${OPENSEARCH_DASHBOARDS_PASSWORD:-}" ]; then + echo "❌ Required environment variables not set!" + echo "" + echo " export OPENSEARCH_ADMIN_PASSWORD='your-secure-password'" + echo " export OPENSEARCH_DASHBOARDS_PASSWORD='your-secure-password'" + exit 1 + fi + + docker compose -f docker/docker-compose.infra.yml -f docker/docker-compose.prod.yml up -d + echo "" + echo "✅ Secure production environment ready" + echo " Analytics: https://localhost/analytics (requires auth)" + echo " OpenSearch: https://localhost:9200 (TLS enabled)" + echo "" + echo "💡 See docker/PRODUCTION.md for customer provisioning" + ;; + stop) + docker compose -f docker/docker-compose.infra.yml -f docker/docker-compose.prod.yml down + echo "✅ Secure production stopped" + ;; + logs) + docker compose -f docker/docker-compose.infra.yml -f docker/docker-compose.prod.yml logs -f + ;; + status) + docker compose -f docker/docker-compose.infra.yml -f docker/docker-compose.prod.yml ps + ;; + clean) + docker compose -f docker/docker-compose.infra.yml -f docker/docker-compose.prod.yml down -v + echo "✅ Secure production cleaned" + ;; + *) + echo "Usage: just prod-secure [start|stop|logs|status|clean]" + ;; + esac + +# Generate TLS certificates for production +generate-certs: + #!/usr/bin/env bash + set -euo pipefail + echo "🔐 Generating TLS certificates..." + chmod +x docker/scripts/generate-certs.sh + docker/scripts/generate-certs.sh + echo "" + echo "✅ Certificates generated in docker/certs/" + echo "" + echo "Next steps:" + echo " 1. export OPENSEARCH_ADMIN_PASSWORD='your-secure-password'" + echo " 2. export OPENSEARCH_DASHBOARDS_PASSWORD='your-secure-password'" + echo " 3. just prod-secure" + # === Infrastructure Only === # Manage infrastructure containers separately @@ -371,6 +444,13 @@ help: @echo " just prod status Check production status" @echo " just prod clean Remove all data" @echo "" + @echo "Production Secure (SaaS with multitenancy):" + @echo " just generate-certs Generate TLS certificates" + @echo " just prod-secure Start with security & multitenancy" + @echo " just prod-secure stop Stop secure production" + @echo " just prod-secure logs View logs" + @echo " just prod-secure clean Remove all data" + @echo "" @echo "Infrastructure:" @echo " just infra up Start infrastructure only" @echo " just infra down Stop infrastructure" diff --git a/pm2.config.cjs b/pm2.config.cjs index 4be50e8d..574751f7 100644 --- a/pm2.config.cjs +++ b/pm2.config.cjs @@ -196,6 +196,41 @@ function loadFrontendEnv() { const frontendEnv = loadFrontendEnv(); +// Load worker .env file for OpenSearch and other worker-specific variables +function loadWorkerEnv() { + const envPath = path.join(__dirname, 'worker', '.env'); + const env = {}; + + try { + if (fs.existsSync(envPath)) { + const envContent = fs.readFileSync(envPath, 'utf-8'); + envContent.split('\n').forEach((line) => { + const trimmed = line.trim(); + // Skip comments and empty lines + if (!trimmed || trimmed.startsWith('#')) { + return; + } + const match = trimmed.match(/^([^=]+)=(.*)$/); + if (match) { + const key = match[1].trim(); + let value = match[2].trim(); + // Remove surrounding quotes if present + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } + env[key] = value; + } + }); + } + } catch (err) { + console.warn('Failed to load worker .env file:', err.message); + } + + return env; +} + +const workerEnv = loadWorkerEnv(); + // Determine environment from NODE_ENV or SHIPSEC_ENV const environment = process.env.SHIPSEC_ENV || process.env.NODE_ENV || 'development'; const isProduction = environment === 'production'; @@ -264,6 +299,7 @@ module.exports = { env_file: __dirname + '/worker/.env', env: Object.assign( { + ...workerEnv, // Load worker .env file (includes OPENSEARCH_URL, etc.) ...currentEnvConfig, NAPI_RS_FORCE_WASI: '1', INTERNAL_SERVICE_TOKEN: process.env.INTERNAL_SERVICE_TOKEN || 'local-internal-token', @@ -290,6 +326,7 @@ module.exports = { env_file: __dirname + '/worker/.env', env: Object.assign( { + ...workerEnv, // Load worker .env file (includes OPENSEARCH_URL, etc.) TEMPORAL_TASK_QUEUE: 'test-worker-integration', TEMPORAL_NAMESPACE: 'shipsec-dev', NODE_ENV: 'development', From 1ce1499104b58cd6582eacd3599e6aa8cc01d2a9 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Mon, 26 Jan 2026 23:28:06 -0500 Subject: [PATCH 05/14] feat(workflows): add STALE status and workflow improvements - Add STALE status for orphaned run records (DB/Temporal mismatch) - Improve status inference from trace events when Temporal not found - Use correct TraceEventType values for status detection - Add amber badge color for STALE status - Extract WorkflowNode into modular directory structure - Document all execution statuses with transition diagram Signed-off-by: Aseem Shrey --- backend/src/workflows/workflows.service.ts | 154 ++++++++++++++++----- docs/workflows/execution-status.md | 101 ++++++++++++++ frontend/src/utils/statusBadgeStyles.ts | 1 + packages/shared/src/execution.ts | 18 ++- 4 files changed, 241 insertions(+), 33 deletions(-) create mode 100644 docs/workflows/execution-status.md diff --git a/backend/src/workflows/workflows.service.ts b/backend/src/workflows/workflows.service.ts index 39768c82..bf44fe5d 100644 --- a/backend/src/workflows/workflows.service.ts +++ b/backend/src/workflows/workflows.service.ts @@ -8,6 +8,7 @@ import { BadRequestException, } from '@nestjs/common'; import { status as grpcStatus, type ServiceError } from '@grpc/grpc-js'; +import { WorkflowNotFoundError } from '@temporalio/client'; import { z } from 'zod'; import { compileWorkflowGraph } from '../dsl/compiler'; @@ -562,11 +563,12 @@ export class WorkflowsService { const graph = (version?.graph ?? workflow?.graph) as { nodes?: unknown[] } | undefined; const nodeCount = graph?.nodes && Array.isArray(graph.nodes) ? graph.nodes.length : 0; - const eventCount = await this.traceRepository.countByType( - run.runId, - 'NODE_STARTED', - organizationId, - ); + // Get trace event counts for status inference + const [startedActions, completedActions, failedActions] = await Promise.all([ + this.traceRepository.countByType(run.runId, 'NODE_STARTED', organizationId), + this.traceRepository.countByType(run.runId, 'NODE_COMPLETED', organizationId), + this.traceRepository.countByType(run.runId, 'NODE_FAILED', organizationId), + ]); // Calculate duration from events (more accurate than createdAt/updatedAt) const eventTimeRange = await this.traceRepository.getEventTimeRange(run.runId, organizationId); @@ -583,22 +585,19 @@ export class WorkflowsService { }); currentStatus = this.normalizeStatus(status.status); } catch (error) { - // If Temporal can't find the workflow (NOT_FOUND), check if events have stopped - // If events stopped more than 5 minutes ago, assume the workflow completed - const isNotFound = this.isNotFoundError(error); - if (isNotFound && eventTimeRange.lastTimestamp) { - const lastEventTime = new Date(eventTimeRange.lastTimestamp); - const minutesSinceLastEvent = (Date.now() - lastEventTime.getTime()) / (1000 * 60); - if (minutesSinceLastEvent > 5) { - // Events stopped more than 5 minutes ago and Temporal can't find it - // Assume the workflow completed successfully - currentStatus = 'COMPLETED'; - this.logger.log( - `Run ${run.runId} not found in Temporal but last event was ${minutesSinceLastEvent.toFixed(1)} minutes ago, assuming COMPLETED`, - ); - } else { - this.logger.warn(`Failed to get status for run ${run.runId}: ${error}`); - } + // If Temporal can't find the workflow, infer status from trace events + if (this.isNotFoundError(error)) { + currentStatus = this.inferStatusFromTraceEvents({ + runId: run.runId, + totalActions: run.totalActions ?? nodeCount, + completedActions, + failedActions, + startedActions, + }); + this.logger.log( + `Run ${run.runId} not found in Temporal, inferred status: ${currentStatus} ` + + `(started=${startedActions}, completed=${completedActions}, failed=${failedActions})`, + ); } else { this.logger.warn(`Failed to get status for run ${run.runId}: ${error}`); } @@ -622,7 +621,7 @@ export class WorkflowsService { endTime: run.updatedAt ?? null, temporalRunId: run.temporalRunId ?? undefined, workflowName, - eventCount, + eventCount: startedActions, nodeCount, duration, triggerType, @@ -1068,19 +1067,59 @@ export class WorkflowsService { this.logger.log( `Fetching status for workflow run ${runId} (temporalRunId=${temporalRunId ?? 'latest'})`, ); - const temporalStatus = await this.temporalService.describeWorkflow({ - workflowId: runId, - runId: temporalRunId, - }); const { organizationId, run } = await this.requireRunAccess(runId, auth); + let temporalStatus: Awaited>; let completedActions = 0; + let failedActions = 0; + let startedActions = 0; + + // Pre-fetch trace event counts for status inference if (run.totalActions && run.totalActions > 0) { - completedActions = await this.traceRepository.countByType( - runId, - 'NODE_COMPLETED', - organizationId, - ); + [completedActions, failedActions, startedActions] = await Promise.all([ + this.traceRepository.countByType(runId, 'NODE_COMPLETED', organizationId), + this.traceRepository.countByType(runId, 'NODE_FAILED', organizationId), + this.traceRepository.countByType(runId, 'NODE_STARTED', organizationId), + ]); + } + + try { + temporalStatus = await this.temporalService.describeWorkflow({ + workflowId: runId, + runId: temporalRunId, + }); + } catch (error) { + // If Temporal can't find the workflow, infer status from trace events + if (this.isNotFoundError(error)) { + const inferredStatus = this.inferStatusFromTraceEvents({ + runId, + totalActions: run.totalActions ?? 0, + completedActions, + failedActions, + startedActions, + }); + + this.logger.log( + `Workflow ${runId} not found in Temporal, inferred status: ${inferredStatus} ` + + `(started=${startedActions}, completed=${completedActions}, failed=${failedActions}, total=${run.totalActions})`, + ); + + temporalStatus = { + workflowId: runId, + runId: temporalRunId ?? runId, + // Cast to WorkflowExecutionStatusName - normalizeStatus handles mapping + status: inferredStatus as unknown as typeof temporalStatus.status, + startTime: run.createdAt.toISOString(), + // Only set closeTime for terminal states that actually ran + closeTime: ['COMPLETED', 'FAILED'].includes(inferredStatus) + ? new Date().toISOString() + : undefined, + historyLength: 0, + taskQueue: '', + }; + } else { + throw error; + } } const statusPayload = this.mapTemporalStatus(runId, temporalStatus, run, completedActions); @@ -1535,15 +1574,66 @@ export class WorkflowsService { } } - private isNotFoundError(error: unknown): error is ServiceError { + private isNotFoundError(error: unknown): boolean { if (!error || typeof error !== 'object') { return false; } + // Check for Temporal WorkflowNotFoundError + if (error instanceof WorkflowNotFoundError) { + return true; + } + + // Check for gRPC NOT_FOUND error const serviceError = error as ServiceError; return serviceError.code === grpcStatus.NOT_FOUND; } + /** + * Infer workflow status from trace events when Temporal workflow is not found. + * + * Cases: + * - No started events → STALE (orphaned record - run exists but never executed) + * - All nodes completed → COMPLETED + * - Any node failed → FAILED + * - Partial completion (some started, not all finished) → FAILED (crashed/lost) + */ + private inferStatusFromTraceEvents(params: { + runId: string; + totalActions: number; + completedActions: number; + failedActions: number; + startedActions: number; + }): ExecutionStatus { + const { totalActions, completedActions, failedActions, startedActions } = params; + + // Case 1: No events at all - orphaned record (DB/Temporal mismatch) + // This indicates data inconsistency - run record exists but workflow never executed + if (startedActions === 0) { + return 'STALE'; + } + + // Case 2: Any node failed explicitly + if (failedActions > 0) { + return 'FAILED'; + } + + // Case 3: All nodes completed successfully + if (totalActions > 0 && completedActions >= totalActions) { + return 'COMPLETED'; + } + + // Case 4: Some nodes started but not all completed and no failures + // This means the workflow crashed or was lost - treat as FAILED + if (startedActions > 0 && completedActions < totalActions) { + return 'FAILED'; + } + + // Fallback: we have events but can't determine status + // This shouldn't happen normally, but default to FAILED for safety + return 'FAILED'; + } + private buildFailure(status: ExecutionStatus, failure?: unknown): FailureSummary | undefined { if (!['FAILED', 'TERMINATED', 'TIMED_OUT'].includes(status)) { return undefined; diff --git a/docs/workflows/execution-status.md b/docs/workflows/execution-status.md new file mode 100644 index 00000000..1ac61aba --- /dev/null +++ b/docs/workflows/execution-status.md @@ -0,0 +1,101 @@ +# Workflow Execution Status + +This document describes the different execution statuses a workflow run can have and when each status applies. + +## Status Overview + +| Status | Color | Description | +|--------|-------|-------------| +| `QUEUED` | Blue | Workflow is waiting to be executed | +| `RUNNING` | Blue | Workflow is actively executing | +| `COMPLETED` | Green | Workflow finished successfully - all nodes completed | +| `FAILED` | Red | Workflow failed - at least one node failed or workflow crashed | +| `CANCELLED` | Gray | Workflow was cancelled by user | +| `TERMINATED` | Gray | Workflow was forcefully terminated | +| `TIMED_OUT` | Amber | Workflow exceeded maximum execution time | +| `AWAITING_INPUT` | Purple | Workflow is paused waiting for human input | +| `STALE` | Amber | Orphaned record - data inconsistency (see below) | + +## Status Transitions + +``` +QUEUED → RUNNING → COMPLETED + → FAILED + → CANCELLED + → TERMINATED + → TIMED_OUT + → AWAITING_INPUT → RUNNING (when input provided) +``` + +## Detailed Status Descriptions + +### QUEUED +The workflow run has been created and is waiting to start execution. This is the initial state before the Temporal worker picks up the workflow. + +### RUNNING +The workflow is actively executing. At least one node has started processing. + +### COMPLETED +All nodes in the workflow have finished successfully. This is a terminal state. + +**Conditions:** +- All expected nodes have `COMPLETED` trace events +- No `FAILED` trace events + +### FAILED +The workflow encountered an error during execution. This is a terminal state. + +**Conditions:** +- At least one node has a `FAILED` trace event, OR +- Some nodes started but not all completed (workflow crashed/lost) + +### CANCELLED +The user manually cancelled the workflow execution. This is a terminal state. + +### TERMINATED +The workflow was forcefully terminated (e.g., via Temporal API). This is a terminal state. + +### TIMED_OUT +The workflow exceeded its maximum allowed execution time. This is a terminal state. + +### AWAITING_INPUT +The workflow has reached a human input node and is waiting for user interaction. The workflow will resume to `RUNNING` when input is provided. + +### STALE +**Special Status - Data Inconsistency Warning** + +The run record exists in the database but there's no evidence it ever executed: +- No trace events in the database +- Temporal has no record of this workflow + +**Common Causes:** +1. **Fresh Temporal instance with old database** - The Temporal server was reset/reinstalled but the application database retained old run records +2. **Failed workflow start** - The backend created a run record but the Temporal workflow failed to start (network error, Temporal unavailable, etc.) +3. **Data migration issues** - Database was migrated without corresponding Temporal data + +**Recommended Action:** +- Review these records and delete them if they represent stale data +- Investigate why the data inconsistency occurred to prevent future occurrences + +## Status Determination Logic + +When querying run status, the system follows this logic: + +1. **Query Temporal** - Get the workflow status from Temporal server +2. **If Temporal returns status** - Use the normalized Temporal status +3. **If Temporal returns NOT_FOUND** - Infer status from trace events: + - No `STARTED` events → `STALE` (orphaned record) + - Any `FAILED` events → `FAILED` + - All nodes have `COMPLETED` events → `COMPLETED` + - Some `STARTED` but incomplete → `FAILED` (crashed) + +## Frontend Badge Colors + +Status badges use these colors for visual distinction: + +- **Blue** (active): `QUEUED`, `RUNNING` +- **Green** (success): `COMPLETED` +- **Red** (error): `FAILED` +- **Amber** (warning): `TIMED_OUT`, `STALE` +- **Gray** (neutral): `CANCELLED`, `TERMINATED` +- **Purple** (attention): `AWAITING_INPUT` diff --git a/frontend/src/utils/statusBadgeStyles.ts b/frontend/src/utils/statusBadgeStyles.ts index ada7699a..6036f4c7 100644 --- a/frontend/src/utils/statusBadgeStyles.ts +++ b/frontend/src/utils/statusBadgeStyles.ts @@ -17,6 +17,7 @@ export const STATUS_COLOR_MAP: Record = { TERMINATED: 'gray', TIMED_OUT: 'amber', AWAITING_INPUT: 'purple', + STALE: 'amber', // Orphaned record - data inconsistency warning }; /** diff --git a/packages/shared/src/execution.ts b/packages/shared/src/execution.ts index 59b7e7ff..83b8e6b9 100644 --- a/packages/shared/src/execution.ts +++ b/packages/shared/src/execution.ts @@ -1,5 +1,20 @@ import { z } from 'zod'; +/** + * Workflow execution status values. + * + * @see docs/workflows/execution-status.md for detailed documentation + * + * - QUEUED: Waiting to execute + * - RUNNING: Actively executing + * - COMPLETED: All nodes finished successfully + * - FAILED: Execution failed (node failure or crash) + * - CANCELLED: User cancelled + * - TERMINATED: Forcefully terminated + * - TIMED_OUT: Exceeded max execution time + * - AWAITING_INPUT: Paused for human input + * - STALE: Orphaned record (data inconsistency) + */ export const EXECUTION_STATUS = [ 'QUEUED', 'RUNNING', @@ -8,7 +23,8 @@ export const EXECUTION_STATUS = [ 'CANCELLED', 'TERMINATED', 'TIMED_OUT', - 'AWAITING_INPUT' + 'AWAITING_INPUT', + 'STALE', ] as const; export type ExecutionStatus = (typeof EXECUTION_STATUS)[number]; From acfdef11da3ff7b0f56dc042ab12a36e692c2892 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Mon, 26 Jan 2026 23:28:21 -0500 Subject: [PATCH 06/14] feat(analytics): add Security Analytics platform with OpenSearch integration Analytics Sink Component (core.analytics.sink): - Index output data from any upstream node to OpenSearch - Auto-detect asset correlation keys (host, domain, url, ip, etc.) - Fire-and-forget with retry logic (3 attempts, exponential backoff) - Configurable index suffix and fail-on-error modes OpenSearch Integration: - Daily index rotation: security-findings-{orgId}-{YYYY.MM.DD} - Index template with standard metadata fields - Multi-tenant data isolation per organization Analytics API: - POST /api/v1/analytics/query with OpenSearch DSL support - Auto-scope queries to organization's index pattern - Rate limiting: 100 req/min per user - Protected routes require authentication - Session cookie support for analytics route auth UI Integration: - Analytics Settings page with tier-based retention - Dashboards link in sidebar (opens in new tab) - View Analytics button uses Discover app with proper URL state - Uses .keyword fields for exact match filtering Component SDK Extensions: - generateFindingHash() for deduplication - Workflow context (workflowId, workflowName, organizationId) - Results output port on nuclei, trufflehog, supabase-scanner - Support for optional inputs in components Bug fixes: - Fix webhook URLs to include global API prefix (ENG-115) - Add proper connectionType for list variable types - Handle invalid_value errors for placeholder fields Signed-off-by: Aseem Shrey --- .ai/analytics-output-port-design.md | 190 +++++++++ backend/.env.example | 17 + backend/package.json | 7 +- backend/scripts/setup-opensearch.ts | 97 +++++ backend/src/analytics/analytics.controller.ts | 193 +++++++++ backend/src/analytics/analytics.module.ts | 8 +- .../src/analytics/dto/analytics-query.dto.ts | 62 +++ .../analytics/dto/analytics-settings.dto.ts | 72 ++++ .../organization-settings.service.ts | 94 +++++ .../analytics/security-analytics.service.ts | 235 +++++++++++ backend/src/app.controller.ts | 110 +++++- backend/src/app.module.ts | 28 +- .../src/auth/providers/local-auth.provider.ts | 28 +- backend/src/auth/session.utils.ts | 48 +++ backend/src/config/opensearch.client.ts | 53 +++ backend/src/config/opensearch.config.ts | 13 + backend/src/config/opensearch.module.ts | 9 + backend/src/database/migration.guard.ts | 1 + backend/src/database/schema/index.ts | 1 + .../database/schema/organization-settings.ts | 17 + backend/src/dsl/validator.ts | 4 + backend/src/main.ts | 8 + bun.lock | 37 +- docs/analytics.md | 200 ++++++++++ docs/components/core.mdx | 41 ++ docs/development/analytics.mdx | 2 +- docs/development/component-development.mdx | 159 ++++++++ docs/development/workflow-analytics.mdx | 353 +++++++++++++++++ docs/docs.json | 1 + docs/installation.mdx | 62 +++ frontend/.env.example | 9 +- frontend/src/App.tsx | 2 + frontend/src/auth/AuthProvider.tsx | 12 +- .../src/components/auth/AdminLoginForm.tsx | 37 +- frontend/src/components/layout/AppLayout.tsx | 61 +++ frontend/src/components/layout/AppTopBar.tsx | 8 + frontend/src/components/layout/TopBar.tsx | 33 ++ frontend/src/config/env.ts | 3 + frontend/src/pages/AnalyticsSettingsPage.tsx | 258 ++++++++++++ frontend/vite.config.ts | 16 +- packages/component-sdk/src/analytics.ts | 66 ++++ packages/component-sdk/src/context.ts | 8 +- packages/component-sdk/src/index.ts | 3 + packages/component-sdk/src/types.ts | 5 + worker/.env.example | 6 + worker/package.json | 1 + worker/src/components/core/analytics-sink.ts | 250 ++++++++++++ worker/src/components/index.ts | 1 + .../security/__tests__/nuclei.test.ts | 3 + .../security/__tests__/trufflehog.test.ts | 38 ++ worker/src/components/security/nuclei.ts | 17 + .../components/security/supabase-scanner.ts | 62 ++- worker/src/components/security/trufflehog.ts | 28 +- .../__tests__/optional-input-handling.test.ts | 279 +++++++++++++ .../activities/run-component.activity.ts | 43 +- worker/src/temporal/types.ts | 1 + worker/src/temporal/workflow-runner.ts | 3 + worker/src/temporal/workflows/index.ts | 1 + worker/src/utils/opensearch-indexer.ts | 374 ++++++++++++++++++ 59 files changed, 3732 insertions(+), 46 deletions(-) create mode 100644 .ai/analytics-output-port-design.md create mode 100644 backend/scripts/setup-opensearch.ts create mode 100644 backend/src/analytics/analytics.controller.ts create mode 100644 backend/src/analytics/dto/analytics-query.dto.ts create mode 100644 backend/src/analytics/dto/analytics-settings.dto.ts create mode 100644 backend/src/analytics/organization-settings.service.ts create mode 100644 backend/src/analytics/security-analytics.service.ts create mode 100644 backend/src/auth/session.utils.ts create mode 100644 backend/src/config/opensearch.client.ts create mode 100644 backend/src/config/opensearch.config.ts create mode 100644 backend/src/config/opensearch.module.ts create mode 100644 backend/src/database/schema/organization-settings.ts create mode 100644 docs/analytics.md create mode 100644 docs/development/workflow-analytics.mdx create mode 100644 frontend/src/pages/AnalyticsSettingsPage.tsx create mode 100644 packages/component-sdk/src/analytics.ts create mode 100644 worker/src/components/core/analytics-sink.ts create mode 100644 worker/src/temporal/__tests__/optional-input-handling.test.ts create mode 100644 worker/src/utils/opensearch-indexer.ts diff --git a/.ai/analytics-output-port-design.md b/.ai/analytics-output-port-design.md new file mode 100644 index 00000000..41ce63cc --- /dev/null +++ b/.ai/analytics-output-port-design.md @@ -0,0 +1,190 @@ +# Analytics Output Port Design + +## Status: Approved +## Date: 2025-01-21 + +## Problem Statement + +When connecting a component's `rawOutput` (which contains complex nested JSON) to the Analytics Sink, OpenSearch hits the default field limit of 1000 fields. This is because: + +1. **Dynamic mapping explosion**: Elasticsearch/OpenSearch creates a field for every unique JSON path +2. **Nested structures**: Arrays with objects like `issues[0].metadata.schema` create many paths +3. **Varying schemas**: Different scanner outputs accumulate unique field paths over time + +Example error: +``` +illegal_argument_exception: Limit of total fields [1000] has been exceeded +``` + +## Solution + +### Design Decisions + +1. **Each component owns its analytics schema** + - Components output structured `list` through dedicated ports (`findings`, `results`, `secrets`, `issues`) + - Component authors define the structure appropriate for their tool + - No generic "one schema fits all" approach + +2. **Analytics Sink accepts `list`** + - Input type: `z.array(z.record(z.string(), z.unknown()))` + - Each item in the array is indexed as a separate document + - Rejects arbitrary nested objects (must be an array) + +3. **Same timestamp for all findings in a batch** + - All findings from one component execution share the same `@timestamp` + - Captured once at the start of indexing, applied to all documents + +4. **Nested `shipsec` context** + - Workflow context stored under `shipsec.*` namespace + - Prevents field name collision with component data + - Clear separation: component fields at root, system fields under `shipsec` + +5. **Nested objects serialized before indexing** + - Any nested object or array within a finding is JSON-stringified + - Prevents field explosion from dynamic mapping + - Trade-off: Can't query inside serialized fields directly, but prevents index corruption + +6. **No `data` wrapper** + - Original PRD design wrapped component output in a `data` field + - New design: finding fields are at the top level for easier querying + +### Document Structure + +**Before (PRD design):** +```json +{ + "workflow_id": "...", + "workflow_name": "...", + "run_id": "...", + "node_ref": "...", + "component_id": "...", + "@timestamp": "...", + "asset_key": "...", + "data": { + "check_id": "DB_RLS_DISABLED", + "severity": "CRITICAL", + "metadata": { "schema": "public", "table": "users" } + } +} +``` + +**After (new design):** +```json +{ + "check_id": "DB_RLS_DISABLED", + "severity": "CRITICAL", + "title": "RLS Disabled on Table: users", + "resource": "public.users", + "metadata": "{\"schema\":\"public\",\"table\":\"users\"}", + "scanner": "supabase-scanner", + "asset_key": "abcdefghij1234567890", + "finding_hash": "a1b2c3d4e5f67890", + + "shipsec": { + "organization_id": "org_123", + "run_id": "shipsec-run-xxx", + "workflow_id": "d1d33161-929f-4af4-9a64-xxx", + "workflow_name": "Supabase Security Audit", + "component_id": "core.analytics.sink", + "node_ref": "analytics-sink-1" + }, + + "@timestamp": "2025-01-21T10:30:00.000Z" +} +``` + +### Component Output Ports + +Components should use their existing structured list outputs: + +| Component | Port | Type | Notes | +|-----------|------|------|-------| +| Nuclei | `results` | `z.array(z.record(z.string(), z.unknown()))` | Scanner + asset_key added | +| TruffleHog | `results` | `z.array(z.record(z.string(), z.unknown()))` | Scanner + asset_key added | +| Supabase Scanner | `results` | `z.array(z.record(z.string(), z.unknown()))` | Scanner + asset_key added | + +All `results` ports include: +- `scanner`: Scanner identifier (e.g., `'nuclei'`, `'trufflehog'`, `'supabase-scanner'`) +- `asset_key`: Primary asset identifier from the finding +- `finding_hash`: Stable hash for deduplication (16-char hex from SHA-256) + +### Finding Hash for Deduplication + +The `finding_hash` enables tracking findings across workflow runs: + +**Generation:** +```typescript +import { createHash } from 'crypto'; + +function generateFindingHash(...fields: (string | undefined | null)[]): string { + const normalized = fields.map((f) => (f ?? '').toLowerCase().trim()).join('|'); + return createHash('sha256').update(normalized).digest('hex').slice(0, 16); +} +``` + +**Key fields per scanner:** +| Scanner | Hash Fields | +|---------|-------------| +| Nuclei | `templateId + host + matchedAt` | +| TruffleHog | `DetectorType + Redacted + filePath` | +| Supabase Scanner | `check_id + projectRef + resource` | + +**Use cases:** +- **New vs recurring**: Is this finding appearing for the first time? +- **First-seen / last-seen**: When did we first detect this? Is it still present? +- **Resolution tracking**: Findings that stop appearing may be resolved +- **Deduplication**: Remove duplicates in dashboards across runs + +### `shipsec` Context Fields + +The indexer automatically adds these fields under `shipsec`: + +| Field | Description | +|-------|-------------| +| `organization_id` | Organization that owns the workflow | +| `run_id` | Unique identifier for this workflow execution | +| `workflow_id` | ID of the workflow definition | +| `workflow_name` | Human-readable workflow name | +| `component_id` | Component type (e.g., `core.analytics.sink`) | +| `node_ref` | Node reference in the workflow graph | +| `asset_key` | Auto-detected or specified asset identifier | + +### Querying in OpenSearch + +With this structure, users can: +- Filter by organization: `shipsec.organization_id: "org_123"` +- Filter by workflow: `shipsec.workflow_id: "xxx"` +- Filter by run: `shipsec.run_id: "xxx"` +- Filter by asset: `asset_key: "api.example.com"` +- Filter by scanner: `scanner: "nuclei"` +- Filter by component-specific fields: `severity: "CRITICAL"` +- Aggregate by severity: `terms` aggregation on `severity` field +- Track finding history: `finding_hash: "a1b2c3d4" | sort @timestamp` +- Find recurring findings: Group by `finding_hash`, count occurrences + +### Trade-offs + +| Decision | Pro | Con | +|----------|-----|-----| +| Serialize nested objects | Prevents field explosion | Can't query inside serialized fields | +| `shipsec` namespace | No field collision | Slightly more verbose queries | +| No generic schema | Better fit per component | Less consistency across components | +| Same timestamp per batch | Accurate (same scan time) | Can't distinguish individual finding times | + +### Implementation Files + +1. `/worker/src/utils/opensearch-indexer.ts` - Add `shipsec` context, serialize nested objects +2. `/worker/src/components/core/analytics-sink.ts` - Accept `list`, consistent timestamp +3. Component files - Ensure structured output, add `results` port where missing + +### Backward Compatibility + +- Existing workflows connecting `rawOutput` to Analytics Sink will still work +- Analytics Sink continues to accept any data type for backward compatibility +- New `list` processing only triggers when input is an array + +### Future Considerations + +1. **Index templates**: Create OpenSearch index template with explicit mappings for `shipsec.*` fields +2. **Field discovery**: Build UI to show available fields from indexed data +3. **Schema validation**: Optional strict mode to validate findings against expected schema diff --git a/backend/.env.example b/backend/.env.example index 824fd962..6e4a2a47 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -43,3 +43,20 @@ PLATFORM_API_URL="" PLATFORM_SERVICE_TOKEN="" # Optional: override request timeout in milliseconds (default 5000) PLATFORM_API_TIMEOUT_MS="" + +# OpenSearch configuration for security analytics indexing +# Optional: if not set, security analytics indexing will be disabled +OPENSEARCH_URL="" +OPENSEARCH_USERNAME="" +OPENSEARCH_PASSWORD="" + +# OpenSearch Dashboards configuration for analytics visualization +# Optional: if not set, Dashboards link will not appear in frontend sidebar +# Example: "http://localhost:5601" or "https://dashboards.example.com" +OPENSEARCH_DASHBOARDS_URL="" + +# Redis configuration for rate limiting and caching +# Optional: if not set, rate limiting will use in-memory storage (not recommended for production) +# For distributed deployments, use a shared Redis instance to ensure rate limits work across multiple backend instances +# Example: "redis://localhost:6379" or "redis://:password@redis.example.com:6379" +REDIS_URL="" diff --git a/backend/package.json b/backend/package.json index e93ade6b..d30a2df2 100644 --- a/backend/package.json +++ b/backend/package.json @@ -14,17 +14,21 @@ "generate:openapi": "bun scripts/generate-openapi.ts", "migration:push": "bun x drizzle-kit push", "migration:smoke": "bun scripts/migration-smoke.ts", - "delete:runs": "bun scripts/delete-all-workflow-runs.ts" + "delete:runs": "bun scripts/delete-all-workflow-runs.ts", + "setup:opensearch": "bun scripts/setup-opensearch.ts" }, "dependencies": { "@clerk/backend": "^2.9.4", "@clerk/types": "^4.81.0", "@grpc/grpc-js": "^1.14.0", + "@nest-lab/throttler-storage-redis": "^1.1.0", "@nestjs/common": "^10.4.0", "@nestjs/config": "^3.2.0", "@nestjs/core": "^10.4.0", "@nestjs/platform-express": "^10.4.0", "@nestjs/swagger": "^11.2.0", + "@nestjs/throttler": "^6.5.0", + "@opensearch-project/opensearch": "^3.5.1", "@shipsec/component-sdk": "workspace:*", "@shipsec/shared": "workspace:*", "@shipsec/studio-worker": "workspace:*", @@ -56,6 +60,7 @@ "@eslint/js": "^9.39.2", "@nestjs/testing": "^10.4.0", "@types/bcryptjs": "^3.0.0", + "@types/cookie-parser": "^1.4.10", "@types/express-serve-static-core": "^4.19.6", "@types/har-format": "^1.2.16", "@types/multer": "^2.0.0", diff --git a/backend/scripts/setup-opensearch.ts b/backend/scripts/setup-opensearch.ts new file mode 100644 index 00000000..7ce319fb --- /dev/null +++ b/backend/scripts/setup-opensearch.ts @@ -0,0 +1,97 @@ +import { Client } from '@opensearch-project/opensearch'; +import { config } from 'dotenv'; + +// Load environment variables +config(); + +async function main() { + const url = process.env.OPENSEARCH_URL; + const username = process.env.OPENSEARCH_USERNAME; + const password = process.env.OPENSEARCH_PASSWORD; + + if (!url) { + console.error('❌ OPENSEARCH_URL environment variable is required'); + process.exit(1); + } + + console.log('🔍 Connecting to OpenSearch...'); + + const client = new Client({ + node: url, + auth: username && password ? { username, password } : undefined, + ssl: { + rejectUnauthorized: process.env.NODE_ENV === 'production', + }, + }); + + try { + // Test connection + const healthCheck = await client.cluster.health(); + console.log(`✅ Connected to OpenSearch cluster (status: ${healthCheck.body.status})`); + + // Create index template for security-findings-* + const templateName = 'security-findings-template'; + console.log(`\n📋 Creating index template: ${templateName}`); + + await client.indices.putIndexTemplate({ + name: templateName, + body: { + index_patterns: ['security-findings-*'], + template: { + settings: { + number_of_shards: 1, + number_of_replicas: 1, + }, + mappings: { + properties: { + '@timestamp': { + type: 'date', + }, + workflow_id: { + type: 'keyword', + }, + workflow_name: { + type: 'keyword', + }, + run_id: { + type: 'keyword', + }, + node_ref: { + type: 'keyword', + }, + component_id: { + type: 'keyword', + }, + asset_key: { + type: 'keyword', + }, + data: { + type: 'object', + dynamic: true, + }, + }, + }, + }, + }, + }); + + console.log(`✅ Index template '${templateName}' created successfully`); + console.log('\n📊 Template configuration:'); + console.log(' - Index pattern: security-findings-*'); + console.log(' - Shards: 1, Replicas: 1'); + console.log(' - Mappings: @timestamp (date), workflow_id (keyword), workflow_name (keyword),'); + console.log(' run_id (keyword), node_ref (keyword), component_id (keyword),'); + console.log(' asset_key (keyword), data (object with dynamic: true)'); + console.log('\n🎉 OpenSearch setup completed successfully!'); + } catch (error) { + console.error('❌ OpenSearch setup failed'); + console.error(error); + process.exit(1); + } +} + +main().catch((error) => { + console.error('❌ Unexpected error during OpenSearch setup'); + console.error(error); + process.exit(1); +}); diff --git a/backend/src/analytics/analytics.controller.ts b/backend/src/analytics/analytics.controller.ts new file mode 100644 index 00000000..88275961 --- /dev/null +++ b/backend/src/analytics/analytics.controller.ts @@ -0,0 +1,193 @@ +import { + BadRequestException, + Body, + Controller, + ForbiddenException, + Get, + Post, + Put, + UnauthorizedException, +} from '@nestjs/common'; +import { ApiOkResponse, ApiTags, ApiHeader } from '@nestjs/swagger'; +import { Throttle } from '@nestjs/throttler'; + +import { SecurityAnalyticsService } from './security-analytics.service'; +import { OrganizationSettingsService } from './organization-settings.service'; +import { AnalyticsQueryRequestDto, AnalyticsQueryResponseDto } from './dto/analytics-query.dto'; +import { + AnalyticsSettingsResponseDto, + UpdateAnalyticsSettingsDto, + TIER_LIMITS, +} from './dto/analytics-settings.dto'; +import { CurrentAuth } from '../auth/auth-context.decorator'; +import type { AuthContext } from '../auth/types'; + +@ApiTags('analytics') +@Controller('analytics') +export class AnalyticsController { + constructor( + private readonly securityAnalyticsService: SecurityAnalyticsService, + private readonly organizationSettingsService: OrganizationSettingsService, + ) {} + + @Post('query') + @Throttle({ default: { limit: 100, ttl: 60000 } }) // 100 requests per minute per user + @ApiOkResponse({ + description: 'Query analytics data for the authenticated organization', + type: AnalyticsQueryResponseDto, + }) + @ApiHeader({ + name: 'X-RateLimit-Limit', + description: 'Maximum number of requests allowed per minute', + schema: { type: 'integer', example: 100 }, + }) + @ApiHeader({ + name: 'X-RateLimit-Remaining', + description: 'Number of requests remaining in the current time window', + schema: { type: 'integer', example: 99 }, + }) + async queryAnalytics( + @CurrentAuth() auth: AuthContext | null, + @Body() queryDto: AnalyticsQueryRequestDto, + ): Promise { + // Require authentication + if (!auth || !auth.isAuthenticated) { + throw new UnauthorizedException('Authentication required'); + } + + // Require organization context + if (!auth.organizationId) { + throw new UnauthorizedException('Organization context required'); + } + + // Validate query syntax + if (queryDto.query && typeof queryDto.query !== 'object') { + throw new BadRequestException('Invalid query syntax: query must be an object'); + } + + if (queryDto.aggs && typeof queryDto.aggs !== 'object') { + throw new BadRequestException('Invalid query syntax: aggs must be an object'); + } + + // Set defaults + const size = queryDto.size ?? 10; + const from = queryDto.from ?? 0; + + // Call the service to execute the query + return this.securityAnalyticsService.query(auth.organizationId, { + query: queryDto.query, + size, + from, + aggs: queryDto.aggs, + }); + } + + @Get('settings') + @ApiOkResponse({ + description: 'Get analytics settings for the authenticated organization', + type: AnalyticsSettingsResponseDto, + }) + async getAnalyticsSettings( + @CurrentAuth() auth: AuthContext | null, + ): Promise { + // Require authentication + if (!auth || !auth.isAuthenticated) { + throw new UnauthorizedException('Authentication required'); + } + + // Require organization context + if (!auth.organizationId) { + throw new UnauthorizedException('Organization context required'); + } + + // Get or create organization settings + const settings = await this.organizationSettingsService.getOrganizationSettings( + auth.organizationId, + ); + + // Get max retention days for tier + const maxRetentionDays = this.organizationSettingsService.getMaxRetentionDays( + settings.subscriptionTier, + ); + + return { + organizationId: settings.organizationId, + subscriptionTier: settings.subscriptionTier, + analyticsRetentionDays: settings.analyticsRetentionDays, + maxRetentionDays, + createdAt: settings.createdAt, + updatedAt: settings.updatedAt, + }; + } + + @Put('settings') + @ApiOkResponse({ + description: 'Update analytics settings for the authenticated organization', + type: AnalyticsSettingsResponseDto, + }) + async updateAnalyticsSettings( + @CurrentAuth() auth: AuthContext | null, + @Body() updateDto: UpdateAnalyticsSettingsDto, + ): Promise { + // Require authentication + if (!auth || !auth.isAuthenticated) { + throw new UnauthorizedException('Authentication required'); + } + + // Require organization context + if (!auth.organizationId) { + throw new UnauthorizedException('Organization context required'); + } + + // Only org admins can update settings + if (!auth.roles.includes('ADMIN')) { + throw new ForbiddenException('Only organization admins can update analytics settings'); + } + + // Get current settings to validate against tier + const currentSettings = await this.organizationSettingsService.getOrganizationSettings( + auth.organizationId, + ); + + // Determine the tier to validate against (use new tier if provided, otherwise current) + const tierToValidate = updateDto.subscriptionTier ?? currentSettings.subscriptionTier; + + // Validate retention period is within tier limits + if (updateDto.analyticsRetentionDays !== undefined) { + const isValid = this.organizationSettingsService.validateRetentionPeriod( + tierToValidate, + updateDto.analyticsRetentionDays, + ); + + if (!isValid) { + const maxDays = TIER_LIMITS[tierToValidate].maxRetentionDays; + throw new BadRequestException( + `Retention period of ${updateDto.analyticsRetentionDays} days exceeds the limit for ${TIER_LIMITS[tierToValidate].name} tier (${maxDays} days)`, + ); + } + } + + // Update settings + const updated = await this.organizationSettingsService.updateOrganizationSettings( + auth.organizationId, + { + analyticsRetentionDays: updateDto.analyticsRetentionDays, + subscriptionTier: updateDto.subscriptionTier, + }, + ); + + // Get max retention days for updated tier + const maxRetentionDays = this.organizationSettingsService.getMaxRetentionDays( + updated.subscriptionTier, + ); + + return { + organizationId: updated.organizationId, + subscriptionTier: updated.subscriptionTier, + analyticsRetentionDays: updated.analyticsRetentionDays, + maxRetentionDays, + createdAt: updated.createdAt, + updatedAt: updated.updatedAt, + }; + } +} diff --git a/backend/src/analytics/analytics.module.ts b/backend/src/analytics/analytics.module.ts index ee64fd92..df287cc4 100644 --- a/backend/src/analytics/analytics.module.ts +++ b/backend/src/analytics/analytics.module.ts @@ -1,8 +1,12 @@ import { Module } from '@nestjs/common'; import { AnalyticsService } from './analytics.service'; +import { SecurityAnalyticsService } from './security-analytics.service'; +import { OrganizationSettingsService } from './organization-settings.service'; +import { AnalyticsController } from './analytics.controller'; @Module({ - providers: [AnalyticsService], - exports: [AnalyticsService], + controllers: [AnalyticsController], + providers: [AnalyticsService, SecurityAnalyticsService, OrganizationSettingsService], + exports: [AnalyticsService, SecurityAnalyticsService, OrganizationSettingsService], }) export class AnalyticsModule {} diff --git a/backend/src/analytics/dto/analytics-query.dto.ts b/backend/src/analytics/dto/analytics-query.dto.ts new file mode 100644 index 00000000..dbe54053 --- /dev/null +++ b/backend/src/analytics/dto/analytics-query.dto.ts @@ -0,0 +1,62 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class AnalyticsQueryRequestDto { + @ApiProperty({ + description: 'OpenSearch DSL query object', + example: { match_all: {} }, + required: false, + }) + query?: Record; + + @ApiProperty({ + description: 'Number of results to return', + example: 10, + default: 10, + required: false, + }) + size?: number; + + @ApiProperty({ + description: 'Offset for pagination', + example: 0, + default: 0, + required: false, + }) + from?: number; + + @ApiProperty({ + description: 'OpenSearch aggregations object', + example: { + components: { + terms: { field: 'component_id' }, + }, + }, + required: false, + }) + aggs?: Record; +} + +export class AnalyticsQueryResponseDto { + @ApiProperty({ + description: 'Total number of matching documents', + example: 100, + }) + total!: number; + + @ApiProperty({ + description: 'Search hits', + type: 'array', + items: { type: 'object' }, + }) + hits!: { + _id: string; + _source: Record; + _score?: number; + }[]; + + @ApiProperty({ + description: 'Aggregation results', + required: false, + }) + aggregations?: Record; +} diff --git a/backend/src/analytics/dto/analytics-settings.dto.ts b/backend/src/analytics/dto/analytics-settings.dto.ts new file mode 100644 index 00000000..824b302c --- /dev/null +++ b/backend/src/analytics/dto/analytics-settings.dto.ts @@ -0,0 +1,72 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsEnum, IsInt, Min, Max } from 'class-validator'; +import type { SubscriptionTier } from '../../database/schema/organization-settings'; + +export type { SubscriptionTier }; + +export const TIER_LIMITS: Record = { + free: { name: 'Free', maxRetentionDays: 30 }, + pro: { name: 'Pro', maxRetentionDays: 90 }, + enterprise: { name: 'Enterprise', maxRetentionDays: 365 }, +}; + +export class AnalyticsSettingsResponseDto { + @ApiProperty({ + description: 'Organization ID', + example: 'org_abc123', + }) + organizationId!: string; + + @ApiProperty({ + description: 'Subscription tier', + enum: ['free', 'pro', 'enterprise'], + example: 'free', + }) + subscriptionTier!: SubscriptionTier; + + @ApiProperty({ + description: 'Data retention period in days', + example: 30, + }) + analyticsRetentionDays!: number; + + @ApiProperty({ + description: 'Maximum retention days allowed for this tier', + example: 30, + }) + maxRetentionDays!: number; + + @ApiProperty({ + description: 'Timestamp when settings were created', + example: '2026-01-20T00:00:00.000Z', + }) + createdAt!: Date; + + @ApiProperty({ + description: 'Timestamp when settings were last updated', + example: '2026-01-20T00:00:00.000Z', + }) + updatedAt!: Date; +} + +export class UpdateAnalyticsSettingsDto { + @ApiProperty({ + description: 'Data retention period in days (must be within tier limits)', + example: 30, + minimum: 1, + maximum: 365, + }) + @IsInt() + @Min(1) + @Max(365) + analyticsRetentionDays!: number; + + // Optional: allow updating subscription tier (if needed in the future) + @ApiProperty({ + description: 'Subscription tier (optional - usually set by billing system)', + enum: ['free', 'pro', 'enterprise'], + required: false, + }) + @IsEnum(['free', 'pro', 'enterprise']) + subscriptionTier?: SubscriptionTier; +} diff --git a/backend/src/analytics/organization-settings.service.ts b/backend/src/analytics/organization-settings.service.ts new file mode 100644 index 00000000..b26a612c --- /dev/null +++ b/backend/src/analytics/organization-settings.service.ts @@ -0,0 +1,94 @@ +import { Inject, Injectable, Logger } from '@nestjs/common'; +import { eq } from 'drizzle-orm'; +import { type NodePgDatabase } from 'drizzle-orm/node-postgres'; + +import { DRIZZLE_TOKEN } from '../database/database.module'; +import { + organizationSettingsTable, + OrganizationSettings, + SubscriptionTier, +} from '../database/schema/organization-settings'; +import { TIER_LIMITS } from './dto/analytics-settings.dto'; + +@Injectable() +export class OrganizationSettingsService { + private readonly logger = new Logger(OrganizationSettingsService.name); + + constructor( + @Inject(DRIZZLE_TOKEN) + private readonly db: NodePgDatabase, + ) {} + + /** + * Get or create organization settings + */ + async getOrganizationSettings(organizationId: string): Promise { + // Try to get existing settings + const [existing] = await this.db + .select() + .from(organizationSettingsTable) + .where(eq(organizationSettingsTable.organizationId, organizationId)); + + if (existing) { + return existing; + } + + // Create default settings if they don't exist + this.logger.log(`Creating default settings for organization: ${organizationId}`); + const [created] = await this.db + .insert(organizationSettingsTable) + .values({ + organizationId, + subscriptionTier: 'free', + analyticsRetentionDays: 30, + }) + .returning(); + + return created; + } + + /** + * Update organization settings + */ + async updateOrganizationSettings( + organizationId: string, + updates: { + analyticsRetentionDays?: number; + subscriptionTier?: SubscriptionTier; + }, + ): Promise { + // Ensure settings exist + await this.getOrganizationSettings(organizationId); + + // Update settings + const [updated] = await this.db + .update(organizationSettingsTable) + .set({ + ...updates, + updatedAt: new Date(), + }) + .where(eq(organizationSettingsTable.organizationId, organizationId)) + .returning(); + + this.logger.log( + `Updated settings for organization ${organizationId}: ${JSON.stringify(updates)}`, + ); + + return updated; + } + + /** + * Validate retention period is within tier limits + */ + validateRetentionPeriod(tier: SubscriptionTier, retentionDays: number): boolean { + const limit = TIER_LIMITS[tier]; + return retentionDays <= limit.maxRetentionDays && retentionDays > 0; + } + + /** + * Get max retention days for a tier + */ + getMaxRetentionDays(tier: SubscriptionTier): number { + return TIER_LIMITS[tier].maxRetentionDays; + } +} diff --git a/backend/src/analytics/security-analytics.service.ts b/backend/src/analytics/security-analytics.service.ts new file mode 100644 index 00000000..c31b6a42 --- /dev/null +++ b/backend/src/analytics/security-analytics.service.ts @@ -0,0 +1,235 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { OpenSearchClient } from '../config/opensearch.client'; + +interface IndexDocumentOptions { + workflowId: string; + workflowName: string; + runId: string; + nodeRef: string; + componentId: string; + assetKeyField?: string; + indexSuffix?: string; +} + +type BulkIndexOptions = IndexDocumentOptions; + +@Injectable() +export class SecurityAnalyticsService { + private readonly logger = new Logger(SecurityAnalyticsService.name); + + constructor(private readonly openSearchClient: OpenSearchClient) {} + + /** + * Index a single document to OpenSearch with metadata + */ + async indexDocument( + orgId: string, + document: Record, + options: IndexDocumentOptions, + ): Promise { + if (!this.openSearchClient.isClientEnabled()) { + this.logger.debug('OpenSearch client not enabled, skipping indexing'); + return; + } + + const client = this.openSearchClient.getClient(); + if (!client) { + this.logger.warn('OpenSearch client is null, skipping indexing'); + return; + } + + try { + const indexName = this.buildIndexName(orgId, options.indexSuffix); + const assetKey = this.detectAssetKey(document, options.assetKeyField); + + const enrichedDocument = { + ...document, + '@timestamp': new Date().toISOString(), + workflow_id: options.workflowId, + workflow_name: options.workflowName, + run_id: options.runId, + node_ref: options.nodeRef, + component_id: options.componentId, + ...(assetKey && { asset_key: assetKey }), + }; + + await client.index({ + index: indexName, + body: enrichedDocument, + }); + + this.logger.debug(`Indexed document to ${indexName} for workflow ${options.workflowId}`); + } catch (error) { + this.logger.error(`Failed to index document: ${error}`); + throw error; + } + } + + /** + * Bulk index multiple documents to OpenSearch + */ + async bulkIndex( + orgId: string, + documents: Record[], + options: BulkIndexOptions, + ): Promise { + if (!this.openSearchClient.isClientEnabled()) { + this.logger.debug('OpenSearch client not enabled, skipping bulk indexing'); + return; + } + + const client = this.openSearchClient.getClient(); + if (!client) { + this.logger.warn('OpenSearch client is null, skipping bulk indexing'); + return; + } + + if (documents.length === 0) { + this.logger.debug('No documents to index, skipping bulk indexing'); + return; + } + + try { + const indexName = this.buildIndexName(orgId, options.indexSuffix); + + // Build bulk operations array + const bulkOps: any[] = []; + for (const document of documents) { + const assetKey = this.detectAssetKey(document, options.assetKeyField); + + const enrichedDocument = { + ...document, + '@timestamp': new Date().toISOString(), + workflow_id: options.workflowId, + workflow_name: options.workflowName, + run_id: options.runId, + node_ref: options.nodeRef, + component_id: options.componentId, + ...(assetKey && { asset_key: assetKey }), + }; + + bulkOps.push({ index: { _index: indexName } }); + bulkOps.push(enrichedDocument); + } + + const response = await client.bulk({ + body: bulkOps, + }); + + if (response.body.errors) { + const errorCount = response.body.items.filter((item: any) => item.index?.error).length; + this.logger.warn( + `Bulk indexing completed with ${errorCount} errors out of ${documents.length} documents`, + ); + } else { + this.logger.debug( + `Bulk indexed ${documents.length} documents to ${indexName} for workflow ${options.workflowId}`, + ); + } + } catch (error) { + this.logger.error(`Failed to bulk index documents: ${error}`); + throw error; + } + } + + /** + * Build the index name with org scoping and date-based rotation + * Format: security-findings-{orgId}-{YYYY.MM.DD} + */ + private buildIndexName(orgId: string, indexSuffix?: string): string { + const date = new Date(); + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, '0'); + const day = String(date.getDate()).padStart(2, '0'); + + const suffix = indexSuffix || `${year}.${month}.${day}`; + return `security-findings-${orgId}-${suffix}`; + } + + /** + * Query analytics data for an organization + */ + async query( + orgId: string, + options: { + query?: Record; + size?: number; + from?: number; + aggs?: Record; + }, + ): Promise<{ + total: number; + hits: { _id: string; _source: Record; _score?: number }[]; + aggregations?: Record; + }> { + if (!this.openSearchClient.isClientEnabled()) { + this.logger.warn('OpenSearch client not enabled, returning empty results'); + return { total: 0, hits: [], aggregations: undefined }; + } + + const client = this.openSearchClient.getClient(); + if (!client) { + this.logger.warn('OpenSearch client is null, returning empty results'); + return { total: 0, hits: [], aggregations: undefined }; + } + + try { + // Build index pattern for org: security-findings-{orgId}-* + const indexPattern = `security-findings-${orgId}-*`; + + // Execute the search + const response = await client.search({ + index: indexPattern, + body: { + query: options.query || { match_all: {} }, + size: options.size ?? 10, + from: options.from ?? 0, + ...(options.aggs && { aggs: options.aggs }), + }, + }); + + // Extract results from OpenSearch response + const total: number = + typeof response.body.hits.total === 'object' + ? (response.body.hits.total.value ?? 0) + : (response.body.hits.total ?? 0); + + const hits = response.body.hits.hits.map((hit: any) => ({ + _id: hit._id, + _source: hit._source, + ...(hit._score !== undefined && { _score: hit._score }), + })); + + return { + total, + hits, + aggregations: response.body.aggregations, + }; + } catch (error) { + this.logger.error(`Failed to query analytics data: ${error}`); + throw error; + } + } + + /** + * Auto-detect asset key from common fields + * Priority: host > domain > subdomain > url > ip > asset > target + */ + private detectAssetKey(document: Record, explicitField?: string): string | null { + // If explicit field is provided, use it + if (explicitField && document[explicitField]) { + return String(document[explicitField]); + } + + // Auto-detect from common fields + const assetFields = ['host', 'domain', 'subdomain', 'url', 'ip', 'asset', 'target']; + + for (const field of assetFields) { + if (document[field]) { + return String(document[field]); + } + } + + return null; + } +} diff --git a/backend/src/app.controller.ts b/backend/src/app.controller.ts index 681adbac..379c7e50 100644 --- a/backend/src/app.controller.ts +++ b/backend/src/app.controller.ts @@ -1,13 +1,119 @@ -import { Controller, Get } from '@nestjs/common'; +import { Controller, Get, Post, Res, UnauthorizedException, Headers } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { SkipThrottle } from '@nestjs/throttler'; +import type { Response } from 'express'; import { AppService } from './app.service'; +import { CurrentAuth } from './auth/auth-context.decorator'; +import type { AuthContext } from './auth/types'; +import { Public } from './auth/public.decorator'; +import type { AuthConfig } from './config/auth.config'; +import { + SESSION_COOKIE_NAME, + SESSION_COOKIE_MAX_AGE, + createSessionToken, +} from './auth/session.utils'; @Controller() export class AppController { - constructor(private readonly appService: AppService) {} + private readonly authCfg: AuthConfig; + constructor( + private readonly appService: AppService, + private readonly configService: ConfigService, + ) { + this.authCfg = this.configService.get('auth')!; + } + + @SkipThrottle() @Get('/health') health() { return this.appService.getHealth(); } + + /** + * Auth validation endpoint for nginx auth_request. + * Returns 200 if authenticated, 401 otherwise. + * Used by nginx to protect /analytics/* routes. + * + * Note: SkipThrottle is required because nginx sends an auth_request + * for every resource loaded from /analytics/*, which can quickly + * exceed rate limits and cause 500 errors. + */ + @SkipThrottle() + @Get('/auth/validate') + validateAuth(@CurrentAuth() auth: AuthContext | null) { + if (!auth || !auth.isAuthenticated) { + throw new UnauthorizedException(); + } + return { valid: true }; + } + + /** + * Login endpoint for local auth. + * Validates Basic auth credentials and sets a session cookie. + */ + @Public() + @Post('/auth/login') + login( + @Headers('authorization') authHeader: string | undefined, + @Res({ passthrough: true }) res: Response, + ) { + // Only for local auth provider + if (this.authCfg.provider !== 'local') { + throw new UnauthorizedException('Login endpoint only available for local auth'); + } + + // Validate Basic auth header + if (!authHeader || !authHeader.startsWith('Basic ')) { + throw new UnauthorizedException('Missing Basic Auth credentials'); + } + + const base64Credentials = authHeader.slice(6); + let username: string; + let password: string; + + try { + const credentials = Buffer.from(base64Credentials, 'base64').toString('utf-8'); + [username, password] = credentials.split(':'); + } catch { + throw new UnauthorizedException('Invalid Basic Auth format'); + } + + if (!username || !password) { + throw new UnauthorizedException('Invalid Basic Auth format'); + } + + // Validate credentials + if ( + username !== this.authCfg.local.adminUsername || + password !== this.authCfg.local.adminPassword + ) { + throw new UnauthorizedException('Invalid admin credentials'); + } + + // Create session token and set cookie + const sessionToken = createSessionToken(username); + + res.cookie(SESSION_COOKIE_NAME, sessionToken, { + httpOnly: true, + secure: process.env.NODE_ENV === 'production', + sameSite: 'lax', + maxAge: SESSION_COOKIE_MAX_AGE, + path: '/', + }); + + return { success: true, message: 'Logged in successfully' }; + } + + /** + * Logout endpoint for local auth. + * Clears the session cookie. + */ + @Public() + @Post('/auth/logout') + logout(@Res({ passthrough: true }) res: Response) { + res.clearCookie(SESSION_COOKIE_NAME, { path: '/' }); + return { success: true, message: 'Logged out successfully' }; + } } diff --git a/backend/src/app.module.ts b/backend/src/app.module.ts index e08627cc..db8bd1b9 100644 --- a/backend/src/app.module.ts +++ b/backend/src/app.module.ts @@ -1,10 +1,15 @@ import { Module } from '@nestjs/common'; import { APP_GUARD } from '@nestjs/core'; import { ConfigModule } from '@nestjs/config'; +import { ThrottlerModule, ThrottlerGuard, seconds } from '@nestjs/throttler'; +import { ThrottlerStorageRedisService } from '@nest-lab/throttler-storage-redis'; +import Redis from 'ioredis'; import { AppController } from './app.controller'; import { AppService } from './app.service'; import { authConfig } from './config/auth.config'; +import { opensearchConfig } from './config/opensearch.config'; +import { OpenSearchModule } from './config/opensearch.module'; import { AgentsModule } from './agents/agents.module'; import { AuthModule } from './auth/auth.module'; import { AuthGuard } from './auth/auth.guard'; @@ -46,8 +51,25 @@ const testingModules = process.env.NODE_ENV === 'production' ? [] : [TestingSupp ConfigModule.forRoot({ isGlobal: true, envFilePath: ['.env', '../.env'], - load: [authConfig], + load: [authConfig, opensearchConfig], }), + ThrottlerModule.forRootAsync({ + useFactory: () => { + const redisUrl = process.env.REDIS_URL; + + return { + throttlers: [ + { + name: 'default', + ttl: seconds(60), // 60 seconds + limit: 100, // 100 requests per minute + }, + ], + storage: redisUrl ? new ThrottlerStorageRedisService(new Redis(redisUrl)) : undefined, // Falls back to in-memory storage if Redis not configured + }; + }, + }), + OpenSearchModule, ...coreModules, ...testingModules, ], @@ -62,6 +84,10 @@ const testingModules = process.env.NODE_ENV === 'production' ? [] : [TestingSupp provide: APP_GUARD, useClass: RolesGuard, }, + { + provide: APP_GUARD, + useClass: ThrottlerGuard, + }, ], }) export class AppModule {} diff --git a/backend/src/auth/providers/local-auth.provider.ts b/backend/src/auth/providers/local-auth.provider.ts index f4adadea..2e2e14d4 100644 --- a/backend/src/auth/providers/local-auth.provider.ts +++ b/backend/src/auth/providers/local-auth.provider.ts @@ -1,10 +1,11 @@ import type { Request } from 'express'; -import { Injectable, UnauthorizedException } from '@nestjs/common'; +import { Injectable, UnauthorizedException, Logger } from '@nestjs/common'; import type { LocalAuthConfig } from '../../config/auth.config'; import { DEFAULT_ROLES, type AuthContext } from '../types'; import type { AuthProviderStrategy } from './auth-provider.interface'; import { DEFAULT_ORGANIZATION_ID } from '../constants'; +import { verifySessionToken, SESSION_COOKIE_NAME } from '../session.utils'; function extractBasicAuth( headerValue: string | undefined, @@ -28,6 +29,7 @@ function extractBasicAuth( @Injectable() export class LocalAuthProvider implements AuthProviderStrategy { readonly name = 'local'; + private readonly logger = new Logger(LocalAuthProvider.name); constructor(private readonly config: LocalAuthConfig) {} @@ -35,16 +37,36 @@ export class LocalAuthProvider implements AuthProviderStrategy { // Always use local-dev org ID for local auth const orgId = DEFAULT_ORGANIZATION_ID; - // Require Basic Auth (admin credentials) + // Check config if (!this.config.adminUsername || !this.config.adminPassword) { throw new UnauthorizedException('Local auth not configured - admin credentials required'); } + // Try session cookie first (for browser navigation requests like /analytics/) + const sessionCookie = request.cookies?.[SESSION_COOKIE_NAME]; + if (sessionCookie) { + const session = verifySessionToken(sessionCookie); + if (session && session.username === this.config.adminUsername) { + this.logger.debug(`Session cookie auth successful for user: ${session.username}`); + return { + userId: 'admin', + organizationId: orgId, + roles: DEFAULT_ROLES, + isAuthenticated: true, + provider: this.name, + }; + } + this.logger.debug('Session cookie invalid or username mismatch'); + } + + // Fall back to Basic Auth (for API requests) const authHeader = request.headers.authorization; const basicAuth = extractBasicAuth(authHeader); if (!basicAuth) { - throw new UnauthorizedException('Missing Basic Auth credentials'); + throw new UnauthorizedException( + 'Missing authentication - provide session cookie or Basic Auth', + ); } if ( diff --git a/backend/src/auth/session.utils.ts b/backend/src/auth/session.utils.ts new file mode 100644 index 00000000..8e3864fd --- /dev/null +++ b/backend/src/auth/session.utils.ts @@ -0,0 +1,48 @@ +import * as crypto from 'crypto'; + +// Session cookie configuration +export const SESSION_COOKIE_NAME = 'shipsec_session'; +export const SESSION_COOKIE_MAX_AGE = 7 * 24 * 60 * 60 * 1000; // 7 days + +// Secret for signing session tokens (use env var in production) +const SESSION_SECRET = process.env.SESSION_SECRET || 'local-dev-session-secret'; + +export interface SessionPayload { + username: string; + ts: number; +} + +/** + * Create a signed session token for local auth. + */ +export function createSessionToken(username: string): string { + const payload = JSON.stringify({ username, ts: Date.now() }); + const hmac = crypto.createHmac('sha256', SESSION_SECRET); + hmac.update(payload); + const signature = hmac.digest('hex'); + return Buffer.from(`${payload}.${signature}`).toString('base64'); +} + +/** + * Verify and decode a session token. + */ +export function verifySessionToken(token: string): SessionPayload | null { + try { + const decoded = Buffer.from(token, 'base64').toString('utf-8'); + const lastDot = decoded.lastIndexOf('.'); + if (lastDot === -1) return null; + + const payload = decoded.slice(0, lastDot); + const signature = decoded.slice(lastDot + 1); + + const hmac = crypto.createHmac('sha256', SESSION_SECRET); + hmac.update(payload); + const expectedSignature = hmac.digest('hex'); + + if (signature !== expectedSignature) return null; + + return JSON.parse(payload); + } catch { + return null; + } +} diff --git a/backend/src/config/opensearch.client.ts b/backend/src/config/opensearch.client.ts new file mode 100644 index 00000000..bed76f8f --- /dev/null +++ b/backend/src/config/opensearch.client.ts @@ -0,0 +1,53 @@ +import { Client } from '@opensearch-project/opensearch'; +import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; + +@Injectable() +export class OpenSearchClient implements OnModuleInit { + private readonly logger = new Logger(OpenSearchClient.name); + private client: Client | null = null; + private isEnabled = false; + + constructor(private readonly configService: ConfigService) {} + + onModuleInit() { + this.initializeClient(); + } + + private initializeClient() { + const url = this.configService.get('opensearch.url'); + const username = this.configService.get('opensearch.username'); + const password = this.configService.get('opensearch.password'); + + if (!url) { + this.logger.warn( + '🔍 OpenSearch client not configured - OPENSEARCH_URL not set. Security analytics indexing disabled.', + ); + return; + } + + try { + this.client = new Client({ + node: url, + auth: username && password ? { username, password } : undefined, + ssl: { + rejectUnauthorized: process.env.NODE_ENV === 'production', + }, + }); + + this.isEnabled = true; + this.logger.log(`🔍 OpenSearch client initialized - Connected to ${url}`); + } catch (error) { + this.logger.error(`Failed to initialize OpenSearch client: ${error}`); + this.isEnabled = false; + } + } + + getClient(): Client | null { + return this.client; + } + + isClientEnabled(): boolean { + return this.isEnabled && this.client !== null; + } +} diff --git a/backend/src/config/opensearch.config.ts b/backend/src/config/opensearch.config.ts new file mode 100644 index 00000000..b031ad3d --- /dev/null +++ b/backend/src/config/opensearch.config.ts @@ -0,0 +1,13 @@ +import { registerAs } from '@nestjs/config'; + +export interface OpenSearchConfig { + url: string | null; + username: string | null; + password: string | null; +} + +export const opensearchConfig = registerAs('opensearch', () => ({ + url: process.env.OPENSEARCH_URL ?? null, + username: process.env.OPENSEARCH_USERNAME ?? null, + password: process.env.OPENSEARCH_PASSWORD ?? null, +})); diff --git a/backend/src/config/opensearch.module.ts b/backend/src/config/opensearch.module.ts new file mode 100644 index 00000000..b4db4b84 --- /dev/null +++ b/backend/src/config/opensearch.module.ts @@ -0,0 +1,9 @@ +import { Module, Global } from '@nestjs/common'; +import { OpenSearchClient } from './opensearch.client'; + +@Global() +@Module({ + providers: [OpenSearchClient], + exports: [OpenSearchClient], +}) +export class OpenSearchModule {} diff --git a/backend/src/database/migration.guard.ts b/backend/src/database/migration.guard.ts index e321a2db..773260b1 100644 --- a/backend/src/database/migration.guard.ts +++ b/backend/src/database/migration.guard.ts @@ -8,6 +8,7 @@ const REQUIRED_TABLES = [ 'artifacts', 'workflow_log_streams', 'workflow_traces', + 'organization_settings', ]; @Injectable() diff --git a/backend/src/database/schema/index.ts b/backend/src/database/schema/index.ts index c92589dc..9b432de7 100644 --- a/backend/src/database/schema/index.ts +++ b/backend/src/database/schema/index.ts @@ -17,3 +17,4 @@ export * from './webhooks'; export * from './terminal-records'; export * from './agent-trace-events'; export * from './node-io'; +export * from './organization-settings'; diff --git a/backend/src/database/schema/organization-settings.ts b/backend/src/database/schema/organization-settings.ts new file mode 100644 index 00000000..b6dd7f14 --- /dev/null +++ b/backend/src/database/schema/organization-settings.ts @@ -0,0 +1,17 @@ +import { integer, pgTable, timestamp, varchar } from 'drizzle-orm/pg-core'; + +export type SubscriptionTier = 'free' | 'pro' | 'enterprise'; + +export const organizationSettingsTable = pgTable('organization_settings', { + organizationId: varchar('organization_id', { length: 191 }).primaryKey(), + subscriptionTier: varchar('subscription_tier', { length: 50 }) + .$type() + .notNull() + .default('free'), + analyticsRetentionDays: integer('analytics_retention_days').notNull().default(30), + createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(), +}); + +export type OrganizationSettings = typeof organizationSettingsTable.$inferSelect; +export type NewOrganizationSettings = typeof organizationSettingsTable.$inferInsert; diff --git a/backend/src/dsl/validator.ts b/backend/src/dsl/validator.ts index 3f888ead..2c486039 100644 --- a/backend/src/dsl/validator.ts +++ b/backend/src/dsl/validator.ts @@ -170,6 +170,10 @@ function isPlaceholderIssue(issue: ZodIssue, placeholderFields: Set): bo return true; case 'too_big': return true; + case 'invalid_value': + // Enum/literal validation fails on placeholder objects with missing fields + // The actual value from upstream will have the correct enum value at runtime + return true; case 'custom': // Custom validations (from .refine()) fail on placeholders but will pass at runtime // when the actual value comes from the connected edge diff --git a/backend/src/main.ts b/backend/src/main.ts index 9471c461..a464c4cc 100644 --- a/backend/src/main.ts +++ b/backend/src/main.ts @@ -3,6 +3,7 @@ import 'reflect-metadata'; import { NestFactory } from '@nestjs/core'; import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger'; import { cleanupOpenApiDoc } from 'nestjs-zod'; +import cookieParser from 'cookie-parser'; import { isVersionCheckDisabled, performVersionCheck } from './version-check'; @@ -14,6 +15,9 @@ async function bootstrap() { logger: ['log', 'error', 'warn'], }); + // Enable cookie parsing for session auth + app.use(cookieParser()); + // Set global prefix for all routes app.setGlobalPrefix('api/v1'); @@ -29,6 +33,7 @@ async function bootstrap() { app.enableCors({ origin: [ 'http://localhost', + 'http://localhost:80', 'http://localhost:5173', 'http://localhost:5174', 'http://localhost:3211', @@ -43,6 +48,9 @@ async function bootstrap() { 'Accept', 'Cache-Control', 'x-organization-id', + 'X-Real-IP', + 'X-Forwarded-For', + 'X-Forwarded-Proto', ], }); const port = Number(process.env.PORT ?? 3211); diff --git a/bun.lock b/bun.lock index dc70ede1..29ed300a 100644 --- a/bun.lock +++ b/bun.lock @@ -29,11 +29,14 @@ "@clerk/backend": "^2.9.4", "@clerk/types": "^4.81.0", "@grpc/grpc-js": "^1.14.0", + "@nest-lab/throttler-storage-redis": "^1.1.0", "@nestjs/common": "^10.4.0", "@nestjs/config": "^3.2.0", "@nestjs/core": "^10.4.0", "@nestjs/platform-express": "^10.4.0", "@nestjs/swagger": "^11.2.0", + "@nestjs/throttler": "^6.5.0", + "@opensearch-project/opensearch": "^3.5.1", "@shipsec/component-sdk": "workspace:*", "@shipsec/shared": "workspace:*", "@shipsec/studio-worker": "workspace:*", @@ -65,6 +68,7 @@ "@eslint/js": "^9.39.2", "@nestjs/testing": "^10.4.0", "@types/bcryptjs": "^3.0.0", + "@types/cookie-parser": "^1.4.10", "@types/express-serve-static-core": "^4.19.6", "@types/har-format": "^1.2.16", "@types/multer": "^2.0.0", @@ -239,6 +243,7 @@ "@googleapis/admin": "^29.0.0", "@grpc/grpc-js": "^1.14.0", "@okta/okta-sdk-nodejs": "^7.3.0", + "@opensearch-project/opensearch": "^3.5.1", "@shipsec/component-sdk": "*", "@shipsec/contracts": "*", "@shipsec/shared": "*", @@ -604,6 +609,8 @@ "@monaco-editor/react": ["@monaco-editor/react@4.7.0", "", { "dependencies": { "@monaco-editor/loader": "^1.5.0" }, "peerDependencies": { "monaco-editor": ">= 0.25.0 < 1", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-cyzXQCtO47ydzxpQtCGSQGOC8Gk3ZUeBXFAxD+CWXYFo5OqZyZUonFl0DwUlTyAfRHntBfw2p3w4s9R6oe1eCA=="], + "@nest-lab/throttler-storage-redis": ["@nest-lab/throttler-storage-redis@1.1.0", "", { "dependencies": { "tslib": "^2.3.0" }, "peerDependencies": { "@nestjs/common": "^7.0.0 || ^8.0.0 || ^9.0.0 || ^10.0.0 || ^11.0.0", "@nestjs/core": "^7.0.0 || ^8.0.0 || ^9.0.0 || ^10.0.0 || ^11.0.0", "@nestjs/throttler": ">=6.0.0", "ioredis": ">=5.0.0", "reflect-metadata": "^0.2.1" } }, "sha512-7DW8MuqoB+ubu8cWby9Vw56eAFqsHFfowEflHbmmAF2sNByRdzcR4ddcyoYLwL3zG53nLmvzUa4EXoHKB4RoaQ=="], + "@nestjs/common": ["@nestjs/common@10.4.21", "", { "dependencies": { "file-type": "20.4.1", "iterare": "1.2.1", "tslib": "2.8.1", "uid": "2.0.2" }, "peerDependencies": { "class-transformer": "*", "class-validator": "*", "reflect-metadata": "^0.1.12 || ^0.2.0", "rxjs": "^7.1.0" }, "optionalPeers": ["class-transformer", "class-validator"] }, "sha512-2nabPCrq6HAc6PlZQsdDaV16ur7rs8Z8SH/rewS0SqbrvV6hgC/D5IPjVt4NvX7UjWKapqq+bymicuiZjP5WlQ=="], "@nestjs/config": ["@nestjs/config@3.3.0", "", { "dependencies": { "dotenv": "16.4.5", "dotenv-expand": "10.0.0", "lodash": "4.17.21" }, "peerDependencies": { "@nestjs/common": "^8.0.0 || ^9.0.0 || ^10.0.0", "rxjs": "^7.1.0" } }, "sha512-pdGTp8m9d0ZCrjTpjkUbZx6gyf2IKf+7zlkrPNMsJzYZ4bFRRTpXrnj+556/5uiI6AfL5mMrJc2u7dB6bvM+VA=="], @@ -618,6 +625,8 @@ "@nestjs/testing": ["@nestjs/testing@10.4.21", "", { "dependencies": { "tslib": "2.8.1" }, "peerDependencies": { "@nestjs/common": "^10.0.0", "@nestjs/core": "^10.0.0", "@nestjs/microservices": "^10.0.0", "@nestjs/platform-express": "^10.0.0" }, "optionalPeers": ["@nestjs/microservices", "@nestjs/platform-express"] }, "sha512-mQyJvrJ4mA9nukx+zXafh0iLtbGmwalnGWdoTih6cKtANGewIVsqgfSpuxwzyR4d42uc5jqgBulEZszmUFQ/5A=="], + "@nestjs/throttler": ["@nestjs/throttler@6.5.0", "", { "peerDependencies": { "@nestjs/common": "^7.0.0 || ^8.0.0 || ^9.0.0 || ^10.0.0 || ^11.0.0", "@nestjs/core": "^7.0.0 || ^8.0.0 || ^9.0.0 || ^10.0.0 || ^11.0.0", "reflect-metadata": "^0.1.13 || ^0.2.0" } }, "sha512-9j0ZRfH0QE1qyrj9JjIRDz5gQLPqq9yVC2nHsrosDVAfI5HHw08/aUAWx9DZLSdQf4HDkmhTTEGLrRFHENvchQ=="], + "@noble/hashes": ["@noble/hashes@1.8.0", "", {}, "sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A=="], "@nodelib/fs.scandir": ["@nodelib/fs.scandir@2.1.5", "", { "dependencies": { "@nodelib/fs.stat": "2.0.5", "run-parallel": "^1.1.9" } }, "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g=="], @@ -630,6 +639,8 @@ "@okta/okta-sdk-nodejs": ["@okta/okta-sdk-nodejs@7.3.0", "", { "dependencies": { "@types/node-forge": "^1.3.1", "deep-copy": "^1.4.2", "eckles": "^1.4.1", "form-data": "^4.0.4", "https-proxy-agent": "^5.0.0", "js-yaml": "^4.1.0", "lodash": "^4.17.20", "njwt": "^2.0.1", "node-fetch": "^2.6.7", "node-jose": "^2.2.0", "parse-link-header": "^2.0.0", "rasha": "^1.2.5", "safe-flat": "^2.0.2", "url-parse": "^1.5.10", "uuid": "^11.1.0" } }, "sha512-6J3VV+8fBOqIXDqb3t2sBeXj1WOEZL6wP2AcGRzvMRMb2WL7JKR6ZDrt/1Kk7j4seXCKMpZrHsPYYdfRXwkSKQ=="], + "@opensearch-project/opensearch": ["@opensearch-project/opensearch@3.5.1", "", { "dependencies": { "aws4": "^1.11.0", "debug": "^4.3.1", "hpagent": "^1.2.0", "json11": "^2.0.0", "ms": "^2.1.3", "secure-json-parse": "^2.4.0" } }, "sha512-6bf+HcuERzAtHZxrm6phjref54ABse39BpkDie/YO3AUFMCBrb3SK5okKSdT5n3+nDRuEEQLhQCl0RQV3s1qpA=="], + "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], "@paralleldrive/cuid2": ["@paralleldrive/cuid2@2.3.1", "", { "dependencies": { "@noble/hashes": "^1.1.5" } }, "sha512-XO7cAxhnTZl0Yggq6jOgjiOHhbgcO4NqFqwSmQpjK3b6TEE6Uj/jfSk6wzYyemh3+I0sHirKSetjQwn5cZktFw=="], @@ -1026,6 +1037,8 @@ "@types/connect": ["@types/connect@3.4.38", "", { "dependencies": { "@types/node": "*" } }, "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug=="], + "@types/cookie-parser": ["@types/cookie-parser@1.4.10", "", { "peerDependencies": { "@types/express": "*" } }, "sha512-B4xqkqfZ8Wek+rCOeRxsjMS9OgvzebEzzLYw7NHYuvzb7IdxOkI0ZHGgeEBX4PUM7QGVvNSK60T3OvWj3YfBRg=="], + "@types/cookiejar": ["@types/cookiejar@2.1.5", "", {}, "sha512-he+DHOWReW0nghN24E1WUqM0efK4kI9oTqDm6XmK8ZPe2djZ90BSNdGnIyCLzCPw7/pogPlGbzI2wHGGmi4O/Q=="], "@types/d3": ["@types/d3@7.4.3", "", { "dependencies": { "@types/d3-array": "*", "@types/d3-axis": "*", "@types/d3-brush": "*", "@types/d3-chord": "*", "@types/d3-color": "*", "@types/d3-contour": "*", "@types/d3-delaunay": "*", "@types/d3-dispatch": "*", "@types/d3-drag": "*", "@types/d3-dsv": "*", "@types/d3-ease": "*", "@types/d3-fetch": "*", "@types/d3-force": "*", "@types/d3-format": "*", "@types/d3-geo": "*", "@types/d3-hierarchy": "*", "@types/d3-interpolate": "*", "@types/d3-path": "*", "@types/d3-polygon": "*", "@types/d3-quadtree": "*", "@types/d3-random": "*", "@types/d3-scale": "*", "@types/d3-scale-chromatic": "*", "@types/d3-selection": "*", "@types/d3-shape": "*", "@types/d3-time": "*", "@types/d3-time-format": "*", "@types/d3-timer": "*", "@types/d3-transition": "*", "@types/d3-zoom": "*" } }, "sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww=="], @@ -1318,6 +1331,8 @@ "available-typed-arrays": ["available-typed-arrays@1.0.7", "", { "dependencies": { "possible-typed-array-names": "^1.0.0" } }, "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ=="], + "aws4": ["aws4@1.13.2", "", {}, "sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw=="], + "bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="], "balanced-match": ["balanced-match@1.0.2", "", {}, "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="], @@ -1854,6 +1869,8 @@ "hosted-git-info": ["hosted-git-info@4.1.0", "", { "dependencies": { "lru-cache": "^6.0.0" } }, "sha512-kyCuEOWjJqZuDbRHzL8V93NzQhwIB71oFWSyzVo+KPZI+pnQPPxucdkrOZvkLRnrf5URsQM+IJ09Dw29cRALIA=="], + "hpagent": ["hpagent@1.2.0", "", {}, "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA=="], + "html-encoding-sniffer": ["html-encoding-sniffer@6.0.0", "", { "dependencies": { "@exodus/bytes": "^1.6.0" } }, "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg=="], "html-url-attributes": ["html-url-attributes@3.0.1", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="], @@ -2008,6 +2025,8 @@ "json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="], + "json11": ["json11@2.0.2", "", { "bin": { "json11": "dist/cli.mjs" } }, "sha512-HIrd50UPYmP6sqLuLbFVm75g16o0oZrVfxrsY0EEys22klz8mRoWlX9KAEDOSOR9Q34rcxsyC8oDveGrCz5uLQ=="], + "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], "jsx-ast-utils": ["jsx-ast-utils@3.3.5", "", { "dependencies": { "array-includes": "^3.1.6", "array.prototype.flat": "^1.3.1", "object.assign": "^4.1.4", "object.values": "^1.1.6" } }, "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ=="], @@ -2208,7 +2227,7 @@ "monaco-editor": ["monaco-editor@0.55.1", "", { "dependencies": { "dompurify": "3.2.7", "marked": "14.0.0" } }, "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A=="], - "ms": ["ms@3.0.0-canary.1", "", {}, "sha512-kh8ARjh8rMN7Du2igDRO9QJnqCb2xYTJxyQYK7vJJS4TvLLmsbyhiKpSW+t+y26gyOyMd0riphX0GeWKU3ky5g=="], + "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], "multer": ["multer@2.0.2", "", { "dependencies": { "append-field": "^1.0.0", "busboy": "^1.6.0", "concat-stream": "^2.0.0", "mkdirp": "^0.5.6", "object-assign": "^4.1.1", "type-is": "^1.6.18", "xtend": "^4.0.2" } }, "sha512-u7f2xaZ/UG8oLXHvtF/oWTRvT44p9ecwBBqTwgJVq0+4BW1g8OW01TyMEGWBHbyMOYVHXslaut7qEQ1meATXgw=="], @@ -2558,6 +2577,8 @@ "schema-utils": ["schema-utils@4.3.3", "", { "dependencies": { "@types/json-schema": "^7.0.9", "ajv": "^8.9.0", "ajv-formats": "^2.1.1", "ajv-keywords": "^5.1.0" } }, "sha512-eflK8wEtyOE6+hsaRVPxvUKYCpRgzLqDTb8krvAsRIwOGlHoSgYLgBXoubGgLd2fT41/OUYdb48v4k4WWHQurA=="], + "secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="], + "semver": ["semver@7.7.2", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA=="], "send": ["send@0.19.2", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "~0.5.2", "http-errors": "~2.0.1", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "~2.4.1", "range-parser": "~1.2.1", "statuses": "~2.0.2" } }, "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg=="], @@ -3046,6 +3067,8 @@ "@shipsec/studio-worker/@types/node": ["@types/node@20.19.27", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-N2clP5pJhB2YnZJ3PIHFk5RkygRX5WO/5f0WC08tp0wd+sv0rsJk3MqWn3CbNmT2J505a5336jaQj4ph1AdMug=="], + "@temporalio/common/ms": ["ms@3.0.0-canary.1", "", {}, "sha512-kh8ARjh8rMN7Du2igDRO9QJnqCb2xYTJxyQYK7vJJS4TvLLmsbyhiKpSW+t+y26gyOyMd0riphX0GeWKU3ky5g=="], + "@temporalio/core-bridge/which": ["which@4.0.0", "", { "dependencies": { "isexe": "^3.1.1" }, "bin": { "node-which": "bin/which.js" } }, "sha512-GlaYyEb07DPxYCKhKzplCWBJtvxZcZMrL+4UkrTSJHHPyZU4mYYTv3qaOe77H7EODLSSopAUFAc6W8U4yqvscg=="], "@temporalio/worker/supports-color": ["supports-color@8.1.1", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q=="], @@ -3082,8 +3105,6 @@ "cli-tableau/chalk": ["chalk@3.0.0", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg=="], - "debug/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "decamelize-keys/decamelize": ["decamelize@1.2.0", "", {}, "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA=="], "decamelize-keys/map-obj": ["map-obj@1.0.1", "", {}, "sha512-7N/q3lyZ+LVCp7PzuxrJr4KMbBE2hW7BT7YNia330OFxIf4d3r5zVpicP2650l7CPN6RM9zOJRl3NGpqSiw3Eg=="], @@ -3206,8 +3227,6 @@ "send/mime": ["mime@1.6.0", "", { "bin": { "mime": "cli.js" } }, "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="], - "send/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "source-map-loader/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], "source-map-support/source-map": ["source-map@0.6.1", "", {}, "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="], @@ -3302,16 +3321,10 @@ "@pm2/agent/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], - "@pm2/agent/debug/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "@pm2/agent/semver/lru-cache": ["lru-cache@6.0.0", "", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA=="], - "@pm2/io/debug/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "@pm2/io/semver/lru-cache": ["lru-cache@6.0.0", "", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA=="], - "@pm2/js-api/debug/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "@redocly/openapi-core/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], "@shipsec/component-sdk/@types/node/undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="], @@ -3428,8 +3441,6 @@ "meow/redent/strip-indent": ["strip-indent@4.1.1", "", {}, "sha512-SlyRoSkdh1dYP0PzclLE7r0M9sgbFKKMFXpFRUMNuKhQSbC6VQIGzq3E0qsfvGJaUFJPGv6Ws1NZ/haTAjfbMA=="], - "needle/debug/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "node-fetch/whatwg-url/tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], "node-fetch/whatwg-url/webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], diff --git a/docs/analytics.md b/docs/analytics.md new file mode 100644 index 00000000..b432f4be --- /dev/null +++ b/docs/analytics.md @@ -0,0 +1,200 @@ +# Analytics Pipeline + +This document describes the analytics infrastructure for ShipSec Studio, including OpenSearch for data storage, OpenSearch Dashboards for visualization, and the routing architecture. + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Nginx (port 80) │ +│ │ +│ /analytics/* ──────► OpenSearch Dashboards (5601) │ +│ /api/* ──────► Backend API (3211) │ +│ /* ──────► Frontend SPA (8080) │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Worker Service │ +│ │ +│ Analytics Sink Component ──────► OpenSearch (9200) │ +│ (OPENSEARCH_URL env var) │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Components + +### OpenSearch (Port 9200) + +Time-series database for storing security findings and workflow analytics. + +**Configuration:** +- Single-node deployment (dev/simple prod) +- Security plugin disabled for development +- Index pattern: `security-findings-{org-id}-{date}` + +### OpenSearch Dashboards (Port 5601) + +Web UI for exploring and visualizing analytics data. + +**Configuration (`opensearch-dashboards.yml`):** +```yaml +server.basePath: "/analytics" +server.rewriteBasePath: true +opensearch.hosts: ["http://opensearch:9200"] +``` + +**Key Settings:** +- `basePath: "/analytics"` - All URLs are prefixed with `/analytics` +- `rewriteBasePath: true` - Strips `/analytics` from incoming requests, adds it back to responses + +### Analytics Sink (Worker Component) + +The `core.analytics.sink` component writes workflow results to OpenSearch. + +**Environment Variable:** +```yaml +OPENSEARCH_URL=http://opensearch:9200 +``` + +**Document Structure:** +```json +{ + "@timestamp": "2026-01-25T01:22:43.783Z", + "title": "Finding title", + "severity": "high", + "description": "...", + "shipsec": { + "organization_id": "local-dev", + "run_id": "shipsec-run-xxx", + "workflow_id": "workflow-xxx", + "workflow_name": "My Workflow", + "component_id": "core.analytics.sink", + "node_ref": "analytics-sink-123" + } +} +``` + +## Nginx Routing + +All traffic flows through Nginx on port 80: + +| Path | Target | Description | +|------|--------|-------------| +| `/analytics/*` | `opensearch-dashboards:5601` | Analytics dashboard UI | +| `/api/*` | `backend:3211` | Backend REST API | +| `/*` | `frontend:8080` | Frontend SPA (catch-all) | + +### OpenSearch Dashboards Routing Details + +The `/analytics` route requires special handling: + +1. **Authentication**: Routes are protected - users must be logged in to access +2. **Session Cookies**: Backend validates session cookies for analytics route auth +3. **BasePath Configuration**: OpenSearch Dashboards is configured with `server.basePath: "/analytics"` +4. **Proxy Pass**: Nginx forwards requests to OpenSearch Dashboards without path rewriting +5. **rewriteBasePath**: OpenSearch Dashboards strips `/analytics` internally and adds it back to URLs + +```nginx +location /analytics/ { + proxy_pass http://opensearch-dashboards; + proxy_set_header osd-xsrf "true"; + proxy_cookie_path /analytics/ /analytics/; +} +``` + +## Frontend Integration + +The frontend links to OpenSearch Dashboards Discover app with pre-filtered queries: + +```typescript +const baseUrl = '/analytics'; +// Use .keyword fields for exact match filtering +const filterQuery = `shipsec.run_id.keyword:"${runId}"`; + +// Build Discover URL with proper state format +const gParam = encodeURIComponent('(time:(from:now-7d,to:now))'); +const aParam = encodeURIComponent( + `(columns:!(_source),index:'security-findings-*',interval:auto,query:(language:kuery,query:'${filterQuery}'),sort:!('@timestamp',desc))` +); +const url = `${baseUrl}/app/discover#/?_g=${gParam}&_a=${aParam}`; + +// Open in new tab +window.open(url, '_blank', 'noopener,noreferrer'); +``` + +**Key points:** +- Use `.keyword` fields (e.g., `shipsec.run_id.keyword`) for exact match filtering +- Use Discover app (`/app/discover`) for viewing raw data without saved views +- Include `index`, `columns`, `interval`, and `sort` in the `_a` param + +**Environment Variable:** +``` +VITE_OPENSEARCH_DASHBOARDS_URL=/analytics +``` + +## Data Flow + +1. **Workflow Execution**: Worker runs workflow with Analytics Sink component +2. **Data Enrichment**: Analytics Sink adds `shipsec.*` metadata fields +3. **Indexing**: Documents bulk-indexed to OpenSearch via `OPENSEARCH_URL` +4. **Visualization**: Users explore data in OpenSearch Dashboards at `/analytics` + +## Troubleshooting + +### Analytics Sink Not Writing Data + +**Symptom:** New workflow runs don't appear in OpenSearch + +**Check:** +```bash +# Verify worker has OPENSEARCH_URL set +docker exec shipsec-worker env | grep OPENSEARCH + +# Check worker logs for indexing errors +docker logs shipsec-worker 2>&1 | grep -i "analytics\|indexing" +``` + +**Solution:** Ensure `OPENSEARCH_URL=http://opensearch:9200` is set in worker environment. + +### OpenSearch Dashboards Shows Blank Page + +**Symptom:** Page loads but content area is empty + +**Check:** +1. Browser console for JavaScript errors +2. Time range filter (data might be outside selected range) +3. Index pattern selection + +**Solution:** +- Set time range to "Last 30 days" or wider +- Ensure `security-findings-*` index pattern is selected + +### Query Returns No Results + +**Check if data exists:** +```bash +# Count documents +curl -s "http://localhost:9200/security-findings-*/_count" | jq '.count' + +# List run_ids with data +curl -s "http://localhost:9200/security-findings-*/_search" \ + -H "Content-Type: application/json" \ + -d '{"size":0,"aggs":{"run_ids":{"terms":{"field":"shipsec.run_id.keyword"}}}}' \ + | jq '.aggregations.run_ids.buckets' +``` + +## Environment Variables + +| Variable | Service | Description | +|----------|---------|-------------| +| `OPENSEARCH_URL` | Worker | OpenSearch connection URL | +| `OPENSEARCH_USERNAME` | Worker | Optional: OpenSearch username | +| `OPENSEARCH_PASSWORD` | Worker | Optional: OpenSearch password | +| `VITE_OPENSEARCH_DASHBOARDS_URL` | Frontend | Dashboard URL for links | + +## See Also + +- [Docker README](../docker/README.md) - Docker deployment configurations +- [nginx.full.conf](../docker/nginx/nginx.full.conf) - Full stack nginx routing +- [opensearch-dashboards.yml](../docker/opensearch-dashboards.yml) - Dashboard configuration diff --git a/docs/components/core.mdx b/docs/components/core.mdx index 101e79e0..626698a8 100644 --- a/docs/components/core.mdx +++ b/docs/components/core.mdx @@ -205,3 +205,44 @@ Provides AWS credentials for S3 operations. | Output | Type | Description | |--------|------|-------------| | `credentials` | Object | Credential object for S3 components | + +--- + +## Analytics + +### Analytics Sink + +Indexes workflow output data into OpenSearch for analytics dashboards, queries, and alerts. Connect the `results` port from upstream security scanners. + +| Input | Type | Description | +|-------|------|-------------| +| `data` | Any | Data to index. Works best with `list` from scanner `results` ports. | + +| Output | Type | Description | +|--------|------|-------------| +| `indexed` | Boolean | Whether data was successfully indexed | +| `documentCount` | Number | Number of documents indexed | +| `indexName` | String | Name of the OpenSearch index used | + +| Parameter | Type | Description | +|-----------|------|-------------| +| `indexSuffix` | String | Custom suffix for the index name. Defaults to workflow slug. | +| `assetKeyField` | Select | Field to use as asset identifier. Options: auto, asset_key, host, domain, subdomain, url, ip, asset, target, custom | +| `customAssetKeyField` | String | Custom field name when assetKeyField is "custom" | +| `failOnError` | Boolean | When enabled, workflow stops if indexing fails. Default: false (fire-and-forget) | + +**How it works:** + +1. Each item in the input array becomes a separate document +2. Workflow context is added under `shipsec.*` namespace +3. Nested objects are serialized to JSON strings (prevents field explosion) +4. All documents get the same `@timestamp` + +**Example use cases:** +- Index Nuclei scan results for trend analysis +- Store TruffleHog secrets for tracking over time +- Aggregate vulnerability data across workflows + + + See [Workflow Analytics](/development/workflow-analytics) for detailed setup and querying guide. + diff --git a/docs/development/analytics.mdx b/docs/development/analytics.mdx index 69ee9956..a4d91a3f 100644 --- a/docs/development/analytics.mdx +++ b/docs/development/analytics.mdx @@ -1,5 +1,5 @@ --- -title: "Analytics" +title: "Product Analytics (PostHog)" description: "PostHog integration for product analytics and session recording" --- diff --git a/docs/development/component-development.mdx b/docs/development/component-development.mdx index 853151f1..77151fdb 100644 --- a/docs/development/component-development.mdx +++ b/docs/development/component-development.mdx @@ -405,7 +405,166 @@ async execute({ inputs }, context) { --- +## Analytics Output Port (Results) +Security components should include a `results` output port for analytics integration. This port outputs structured findings that can be indexed into OpenSearch via the Analytics Sink. + +### Schema Requirements + +The `results` port must output `list` (array of records): + +```typescript +outputs: outputs({ + // ... other outputs ... + + results: port(z.array(z.record(z.string(), z.unknown())), { + label: 'Results', + description: + 'Analytics-ready findings array. Each item includes scanner name and asset key. Connect to Analytics Sink.', + connectionType: { kind: 'list', element: { kind: 'primitive', name: 'json' } }, + }), +}), +``` + +### Required Fields + +Each finding in the results array **must** include: + +| Field | Type | Description | +|-------|------|-------------| +| `scanner` | string | Scanner identifier (e.g., `'nuclei'`, `'trufflehog'`, `'supabase-scanner'`) | +| `asset_key` | string | Primary asset identifier (host, domain, target, etc.) | +| `finding_hash` | string | Stable hash for deduplication (16-char hex from SHA-256) | + +Additional fields from the scanner output should be spread into the finding object. + +### Finding Hash + +The `finding_hash` is a stable identifier that enables deduplication across workflow runs. It should be generated from the key identifying fields of each finding. + +**Purpose:** +- Track if a finding is **new** or **recurring** across scans +- Deduplicate findings in dashboards +- Calculate **first-seen** and **last-seen** timestamps +- Identify which findings have been **resolved** (no longer appearing) + +**How to generate:** + +Import from the component SDK: + +```typescript +import { generateFindingHash } from '@shipsec/component-sdk'; + +// Usage +const hash = generateFindingHash(finding.templateId, finding.host, finding.matchedAt); +``` + +**Key fields per scanner:** + +| Scanner | Fields Used | +|---------|-------------| +| Nuclei | `templateId + host + matchedAt` | +| TruffleHog | `DetectorType + Redacted + filePath` | +| Supabase Scanner | `check_id + projectRef + resource` | + +Choose fields that uniquely identify a finding but remain stable across runs (avoid timestamps, random IDs, etc.). + +### Example Implementation + +```typescript +import { generateFindingHash } from '@shipsec/component-sdk'; + +async execute({ inputs, params }, context) { + // ... run scanner and get findings ... + + // Build analytics-ready results with scanner metadata + const results: Record[] = findings.map((finding) => ({ + ...finding, // Spread all finding fields + scanner: 'my-scanner', // Scanner identifier + asset_key: finding.host ?? inputs.target, // Primary asset + finding_hash: generateFindingHash( // Stable deduplication hash + finding.ruleId, + finding.host, + finding.matchedAt + ), + })); + + return { + findings, // Original findings array + results, // Analytics-ready array for Analytics Sink + rawOutput, // Raw output for debugging + }; +} +``` + +### How It Works + +1. **Component outputs `results`**: Each scanner outputs its findings with `scanner` and `asset_key` fields +2. **Connect to Analytics Sink**: In the workflow canvas, connect the `results` port to Analytics Sink's `data` input +3. **Indexed to OpenSearch**: Each item in the array becomes a separate document with: + - Finding data at root level (nested objects serialized to JSON strings) + - Workflow context under `shipsec.*` namespace + - Consistent `@timestamp` for all findings in the batch + +### Document Structure in OpenSearch + +```json +{ + "check_id": "DB_RLS_DISABLED", + "severity": "CRITICAL", + "title": "RLS Disabled", + "metadata": "{\"table\":\"users\"}", + "scanner": "supabase-scanner", + "asset_key": "abc123xyz", + "finding_hash": "a1b2c3d4e5f67890", + "shipsec": { + "organization_id": "org_123", + "run_id": "run_abc123", + "workflow_id": "wf_xyz789", + "workflow_name": "Supabase Security Audit", + "component_id": "core.analytics.sink", + "node_ref": "analytics-sink-1" + }, + "@timestamp": "2024-01-21T10:30:00Z" +} +``` + +### `shipsec` Context Fields + +The Analytics Sink automatically adds workflow context under the `shipsec` namespace: + +| Field | Description | +|-------|-------------| +| `organization_id` | Organization that owns the workflow | +| `run_id` | Unique identifier for this workflow execution | +| `workflow_id` | ID of the workflow definition | +| `workflow_name` | Human-readable workflow name | +| `component_id` | Component type (e.g., `core.analytics.sink`) | +| `node_ref` | Node reference in the workflow graph | +| `asset_key` | Auto-detected or specified asset identifier | + +### Example Queries + +``` +# Find all findings for an asset +asset_key: "api.example.com" + +# Find new findings (first seen today) +finding_hash: X AND @timestamp: [now-1d TO now] AND NOT (finding_hash: X AND @timestamp: [* TO now-1d]) + +# All findings from a specific workflow run +shipsec.run_id: "run_abc123" + +# Aggregate findings by scanner +scanner: * | stats count() by scanner + +# Track recurring findings +finding_hash: "a1b2c3d4" | sort @timestamp +``` + + + Nested objects in findings are automatically serialized to JSON strings to prevent OpenSearch field explosion (1000 field limit). + --- diff --git a/docs/development/workflow-analytics.mdx b/docs/development/workflow-analytics.mdx new file mode 100644 index 00000000..2014e12b --- /dev/null +++ b/docs/development/workflow-analytics.mdx @@ -0,0 +1,353 @@ +--- +title: "Workflow Analytics" +description: "Index security findings into OpenSearch for dashboards, queries, and alerting" +--- + +ShipSec Studio includes a workflow analytics system that indexes security findings into OpenSearch. This enables real-time dashboards, historical trend analysis, and alerting on security data. + +--- + +## Overview + +The analytics system consists of: + +1. **Analytics Sink component** - Indexes workflow output data into OpenSearch +2. **Results output port** - Structured findings from security scanners +3. **OpenSearch storage** - Time-series index for querying and visualization +4. **View Analytics button** - Quick access to filtered dashboards + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph Scanners + N[Nuclei Scan] + T[TruffleHog] + S[Supabase Scanner] + end + + AS[Analytics Sink] + OS[(OpenSearch)] + + subgraph Dashboard[OpenSearch Dashboards] + V[Visualizations] + A[Alerts] + Q[Queries] + end + + N -->|results port| AS + T -->|results port| AS + S -->|results port| AS + AS --> OS + OS --> Dashboard +``` + +Each scanner outputs findings through its `results` port, which connects to the Analytics Sink. The sink indexes each finding as a separate document with workflow metadata. + +--- + +## Document Structure + +Indexed documents follow this structure: + +```json +{ + "check_id": "DB_RLS_DISABLED", + "severity": "CRITICAL", + "title": "RLS Disabled on Table: users", + "resource": "public.users", + "metadata": "{\"schema\":\"public\",\"table\":\"users\"}", + "scanner": "supabase-scanner", + "asset_key": "abcdefghij1234567890", + "finding_hash": "a1b2c3d4e5f67890", + "shipsec": { + "organization_id": "org_123", + "run_id": "shipsec-run-xxx", + "workflow_id": "d1d33161-929f-4af4-9a64-xxx", + "workflow_name": "Supabase Security Audit", + "component_id": "core.analytics.sink", + "node_ref": "analytics-sink-1" + }, + "@timestamp": "2025-01-21T10:30:00.000Z" +} +``` + +### Field Categories + +| Category | Fields | Description | +|----------|--------|-------------| +| Finding data | `check_id`, `severity`, `title`, etc. | Scanner-specific fields at root level | +| Asset tracking | `scanner`, `asset_key`, `finding_hash` | Required fields for analytics | +| Workflow context | `shipsec.*` | Automatic metadata from the workflow | +| Timestamp | `@timestamp` | Indexing timestamp | + + + Nested objects in findings are automatically serialized to JSON strings to prevent OpenSearch field explosion (1000 field limit). + + +--- + +## `shipsec` context fields + +The Analytics Sink automatically adds workflow context under the `shipsec` namespace: + +| Field | Description | +|-------|-------------| +| `organization_id` | Organization that owns the workflow | +| `run_id` | Unique identifier for this workflow execution | +| `workflow_id` | ID of the workflow definition | +| `workflow_name` | Human-readable workflow name | +| `component_id` | Component type (always `core.analytics.sink`) | +| `node_ref` | Node reference in the workflow graph | +| `asset_key` | Auto-detected or specified asset identifier | + +--- + +## Finding Hash for Deduplication + +The `finding_hash` is a stable 16-character identifier that enables tracking findings across workflow runs. + +### Purpose + +- **New vs recurring**: Determine if a finding appeared before +- **First-seen / last-seen**: Track when findings were first and last detected +- **Resolution tracking**: Findings that stop appearing may be resolved +- **Deduplication**: Remove duplicates in dashboards across runs + +### Generation + +Each scanner generates the hash from key identifying fields: + +| Scanner | Hash Fields | +|---------|-------------| +| Nuclei | `templateId + host + matchedAt` | +| TruffleHog | `DetectorType + Redacted + filePath` | +| Supabase Scanner | `check_id + projectRef + resource` | + +Fields are normalized (lowercase, trimmed) and hashed with SHA-256, truncated to 16 hex characters. + +--- + +## Querying Data + +### Basic Queries (KQL) + +``` +# Find all findings for an asset +asset_key: "api.example.com" + +# Filter by severity +severity: "CRITICAL" OR severity: "HIGH" + +# Filter by scanner +scanner: "nuclei" + +# All findings from a specific workflow run +shipsec.run_id: "shipsec-run-abc123" + +# Filter by organization +shipsec.organization_id: "org_123" + +# Filter by workflow +shipsec.workflow_id: "d1d33161-929f-4af4-9a64-xxx" +``` + +### Tracking Findings Over Time + +``` +# Track a specific finding across runs +finding_hash: "a1b2c3d4e5f67890" + +# Find recurring findings (multiple runs) +# Use aggregation: group by finding_hash, count occurrences +``` + +### Common Aggregations + +| Aggregation | Use Case | +|-------------|----------| +| `terms` on `severity` | Count findings by severity | +| `terms` on `scanner` | Count findings by scanner | +| `terms` on `asset_key` | Most vulnerable assets | +| `date_histogram` on `@timestamp` | Findings over time | +| `cardinality` on `finding_hash` | Unique findings count | + +--- + +## Setting Up OpenSearch + +### Environment Variables + +Set these in your `worker/.env`: + +```bash +OPENSEARCH_URL=http://localhost:9200 +OPENSEARCH_USERNAME=admin +OPENSEARCH_PASSWORD=admin +``` + +Set this in your `frontend/.env`: + +```bash +VITE_OPENSEARCH_DASHBOARDS_URL=http://localhost:5601 +``` + +### Docker Compose + +The infrastructure stack includes OpenSearch and OpenSearch Dashboards: + +```bash +docker compose -f docker/docker-compose.infra.yml up -d opensearch opensearch-dashboards +``` + +### Index Pattern + +After indexing data, create an index pattern in OpenSearch Dashboards: + +1. Go to **Dashboards Management** > **Index Patterns** +2. Create pattern: `security-findings-*` +3. Select `@timestamp` as the time field +4. Click **Create index pattern** + + + If you don't see `shipsec.*` fields in Available Fields after indexing, refresh the index pattern field list in Dashboards Management. + + +--- + +## Using Analytics Sink + +### Basic Workflow + +1. Add a security scanner to your workflow (Nuclei, TruffleHog, etc.) +2. Add an Analytics Sink component +3. Connect the scanner's `results` port to the Analytics Sink's `data` input +4. Run the workflow + +### Component Parameters + +| Parameter | Description | +|-----------|-------------| +| **Index Suffix** | Custom suffix for the index name. Defaults to workflow slug. | +| **Asset Key Field** | Field to use as asset identifier. Auto-detect checks: asset_key > host > domain > subdomain > url > ip > asset > target | +| **Custom Field Name** | Custom field when Asset Key Field is "custom" | +| **Fail on Error** | When enabled, workflow stops if indexing fails. Default: fire-and-forget. | + +### Fire-and-Forget Mode + +By default, Analytics Sink operates in fire-and-forget mode: +- Indexing errors are logged but don't stop the workflow +- Useful for non-critical analytics that shouldn't block security scans +- Enable "Fail on Error" for strict indexing requirements + +--- + +## View Analytics Button + +The workflow builder includes a "View Analytics" button that opens OpenSearch Dashboards with pre-filtered data: + +- **When a run is selected**: Filters by `shipsec.run_id` +- **When no run is selected**: Filters by `shipsec.workflow_id` +- **Time range**: Last 7 days + +The button only appears when `VITE_OPENSEARCH_DASHBOARDS_URL` is configured. + +--- + +## Index Naming + +Indexes follow the pattern: `security-findings-{orgId}-{suffix}` + +| Component | Value | +|-----------|-------| +| `orgId` | Organization ID from workflow context | +| `suffix` | Custom suffix parameter, or date (`YYYY.MM.DD`) | + +Example: `security-findings-org_abc123-2025.01.21` + +--- + +## Building Dashboards + +### Recommended Visualizations + +| Visualization | Description | +|---------------|-------------| +| **Findings Over Time** | Line chart with `@timestamp` on X-axis, count on Y-axis | +| **Severity Distribution** | Pie chart with `terms` on `severity` | +| **Top Vulnerable Assets** | Bar chart with `terms` on `asset_key` | +| **Findings by Scanner** | Bar chart with `terms` on `scanner` | +| **New vs Recurring** | Use `finding_hash` cardinality vs total count | + +### Alert Examples + +| Alert | Query | +|-------|-------| +| Critical finding detected | `severity: "CRITICAL"` | +| New secrets exposed | `scanner: "trufflehog"` | +| RLS disabled | `check_id: "DB_RLS_DISABLED"` | + +--- + +## Troubleshooting + +### Data not appearing in OpenSearch + +1. Check worker logs for `[OpenSearchIndexer]` messages +2. Verify `OPENSEARCH_URL` is set in worker environment +3. Ensure Analytics Sink is connected to a `results` port +4. Check if OpenSearch is running: `curl http://localhost:9200/_cluster/health` + +### Field mapping errors + +If you see "Limit of total fields [1000] has been exceeded": +1. Delete the problematic index: `curl -X DELETE "http://localhost:9200/security-findings-*"` +2. Re-run the workflow (new index will use correct schema) + +### shipsec fields not visible + +1. Fields starting with `_` are hidden in OpenSearch UI +2. Ensure you're using `shipsec.*` (no underscore prefix) +3. Refresh the index pattern in Dashboards Management + +### pm2 not loading environment variables + +pm2's `env_file` doesn't auto-inject variables. The worker uses a custom `loadWorkerEnv()` function in `pm2.config.cjs`. After changing `worker/.env`: + +```bash +pm2 delete shipsec-worker +pm2 start pm2.config.cjs --only shipsec-worker +``` + +--- + +## Best Practices + +### Do + +- Connect `results` ports (not `rawOutput`) to Analytics Sink +- Use meaningful index suffixes for organization +- Monitor index size and implement retention policies +- Create saved searches for common queries + +### Don't + +- Don't connect deeply nested JSON (causes field explosion) +- Don't rely on analytics for critical workflow logic +- Don't store PII or secrets in indexed findings + +--- + +## Component Author Guidelines + +If you're building a security scanner component, see [Analytics Output Port](/development/component-development#analytics-output-port-results) for implementation details on adding the `results` output port. + +--- + +## Related + +- [Component Development](/development/component-development) - Building scanner components +- [Core Components](/components/core) - Analytics Sink reference +- [Analytics (PostHog)](/development/analytics) - Product analytics (different system) diff --git a/docs/docs.json b/docs/docs.json index d5d0f744..046e4014 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -42,6 +42,7 @@ "pages": [ "development/component-development", "development/isolated-volumes", + "development/workflow-analytics", "development/analytics", "development/release-process" ] diff --git a/docs/installation.mdx b/docs/installation.mdx index c99d868f..2e56a34d 100644 --- a/docs/installation.mdx +++ b/docs/installation.mdx @@ -74,6 +74,68 @@ The `just dev` command automatically: --- +## Analytics Stack (Optional) + +ShipSec Studio includes an optional analytics stack powered by OpenSearch for indexing and visualizing workflow execution data. + +### Starting the Analytics Stack + +The analytics services are included in the infrastructure docker-compose file: + +```bash +# Start infrastructure including OpenSearch +just infra up +``` + +This will start: +- **OpenSearch** on port `9200` - Search and analytics engine +- **OpenSearch Dashboards** on port `5601` - Visualization and query UI + +### Configuring Analytics + +Add these environment variables to your backend and worker `.env` files: + +```bash +# Backend (.env) +OPENSEARCH_URL=http://localhost:9200 +OPENSEARCH_USERNAME=admin +OPENSEARCH_PASSWORD=admin +OPENSEARCH_DASHBOARDS_URL=http://localhost:5601 + +# Frontend (.env) +VITE_OPENSEARCH_DASHBOARDS_URL=http://localhost:5601 +``` + +### Setting Up the Index Template + +After starting OpenSearch, create the security findings index template: + +```bash +cd backend +bun run setup:opensearch +``` + +This creates the `security-findings-*` index template with proper mappings for workflow execution data. + +### Using Analytics + +1. **Analytics Sink Component**: Add the "Analytics Sink" component to your workflows to index output data +2. **Dashboards Link**: Access OpenSearch Dashboards from the Studio sidebar +3. **Query API**: Use the `/api/analytics/query` endpoint to query indexed data programmatically + +### Analytics Service Endpoints + +| Service | URL | Notes | +|---------|-----|-------| +| OpenSearch | http://localhost:9200 | Search engine API | +| OpenSearch Dashboards | http://localhost:5601 | Visualization UI | + + + The analytics stack is optional. If OpenSearch is not configured, the Analytics Sink component will gracefully skip indexing and log a warning. + + +--- + ## Production Deployment For production, use the Docker-based deployment: diff --git a/frontend/.env.example b/frontend/.env.example index 7bc89a98..8089b93e 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -1,5 +1,5 @@ -# API Configuration -VITE_API_URL=http://localhost:3211 +# API Configuration (nginx /api) +VITE_API_URL=http://localhost # Application Configuration VITE_APP_NAME=Security Workflow Builder @@ -21,3 +21,8 @@ VITE_PUBLIC_POSTHOG_HOST= # Logo.dev public key for brand logos VITE_LOGO_DEV_PUBLIC_KEY= + +# OpenSearch Dashboards (Optional - for Analytics features) +# Leave empty to hide Dashboards navigation link +# For dev/prod: http://localhost/analytics (nginx in dev/prod) +VITE_OPENSEARCH_DASHBOARDS_URL=http://localhost/analytics diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 26139b94..b06bf0a1 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -12,6 +12,7 @@ import { WebhookEditorPage } from '@/pages/WebhookEditorPage'; import { SchedulesPage } from '@/pages/SchedulesPage'; import { ActionCenterPage } from '@/pages/ActionCenterPage'; import { RunRedirect } from '@/pages/RunRedirect'; +import { AnalyticsSettingsPage } from '@/pages/AnalyticsSettingsPage'; import { ToastProvider } from '@/components/ui/toast-provider'; import { AppLayout } from '@/components/layout/AppLayout'; import { AuthProvider } from '@/auth/auth-context'; @@ -84,6 +85,7 @@ function App() { } /> } /> } /> + } /> } /> } /> { console.warn('Local auth: signUp not implemented'); }, - signOut: () => { + signOut: async () => { + // Clear session cookie via backend logout endpoint + // Use relative path to ensure we hit the same origin as login + try { + await fetch('/api/v1/auth/logout', { + method: 'POST', + credentials: 'include', + }); + } catch (error) { + console.warn('Failed to clear session cookie:', error); + } // Clear admin credentials from store useAuthStore.getState().clear(); }, diff --git a/frontend/src/components/auth/AdminLoginForm.tsx b/frontend/src/components/auth/AdminLoginForm.tsx index 3840dff6..094e3311 100644 --- a/frontend/src/components/auth/AdminLoginForm.tsx +++ b/frontend/src/components/auth/AdminLoginForm.tsx @@ -3,9 +3,8 @@ import { Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; import { Label } from '@/components/ui/label'; import { useAuthStore } from '@/store/authStore'; -import { useNavigate } from 'react-router-dom'; +import { useNavigate, useSearchParams } from 'react-router-dom'; import { LogIn } from 'lucide-react'; -import { API_V1_URL } from '@/services/api'; export function AdminLoginForm() { const [username, setUsername] = useState(''); @@ -14,6 +13,8 @@ export function AdminLoginForm() { const [isLoading, setIsLoading] = useState(false); const setAdminCredentials = useAuthStore((state) => state.setAdminCredentials); const navigate = useNavigate(); + const [searchParams] = useSearchParams(); + const returnTo = searchParams.get('returnTo'); const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); @@ -31,28 +32,42 @@ export function AdminLoginForm() { setIsLoading(true); try { - // Test the credentials by making a simple API call - // If it fails, the credentials are invalid + // Validate credentials and set session cookie via /auth/login endpoint + // This sets an httpOnly cookie for browser navigation to protected routes (e.g., /analytics/) + // Use relative path to ensure cookie is set via nginx (same origin as /analytics/* routes) const credentials = btoa(`${trimmedUsername}:${trimmedPassword}`); - const response = await fetch(`${API_V1_URL}/workflows`, { + const loginResponse = await fetch('/api/v1/auth/login', { + method: 'POST', headers: { Authorization: `Basic ${credentials}`, 'Content-Type': 'application/json', }, + credentials: 'include', // Important: include cookies in the response }); - if (!response.ok) { - if (response.status === 401) { + if (!loginResponse.ok) { + if (loginResponse.status === 401) { throw new Error('Invalid username or password'); } - throw new Error(`Authentication failed: ${response.status} ${response.statusText}`); + throw new Error( + `Authentication failed: ${loginResponse.status} ${loginResponse.statusText}`, + ); } - // Store credentials only after verification succeeds + // Store credentials for API requests (Basic auth header) setAdminCredentials(trimmedUsername, trimmedPassword); - // Success - navigate to home - navigate('/'); + // Success - redirect to returnTo URL or home + if (returnTo) { + // For paths like /analytics/*, use full page navigation since they're served by nginx + if (returnTo.startsWith('/analytics')) { + window.location.href = returnTo; + } else { + navigate(returnTo); + } + } else { + navigate('/'); + } } catch (err) { // Clear credentials on error useAuthStore.getState().clear(); diff --git a/frontend/src/components/layout/AppLayout.tsx b/frontend/src/components/layout/AppLayout.tsx index bd0bf44f..d12b92b8 100644 --- a/frontend/src/components/layout/AppLayout.tsx +++ b/frontend/src/components/layout/AppLayout.tsx @@ -23,6 +23,8 @@ import { Command, Zap, Webhook, + BarChart3, + Settings, } from 'lucide-react'; import React, { useState, useEffect, useCallback } from 'react'; import { useAuthStore } from '@/store/authStore'; @@ -299,6 +301,21 @@ export function AppLayout({ children }: AppLayoutProps) { href: '/artifacts', icon: Archive, }, + { + name: 'Analytics Settings', + href: '/analytics-settings', + icon: Settings, + }, + ...(env.VITE_OPENSEARCH_DASHBOARDS_URL + ? [ + { + name: 'Dashboards', + href: env.VITE_OPENSEARCH_DASHBOARDS_URL, + icon: BarChart3, + external: true, + }, + ] + : []), ]; const isActive = (path: string) => { @@ -399,6 +416,50 @@ export function AppLayout({ children }: AppLayoutProps) { {navigationItems.map((item) => { const Icon = item.icon; const active = isActive(item.href); + const isExternal = 'external' in item && item.external; + const openInNewTab = isExternal && 'newTab' in item ? item.newTab !== false : true; + + // Render external link + if (isExternal) { + return ( + { + // Close sidebar on mobile after clicking + if (isMobile) { + setSidebarOpen(false); + } + }} + > + + + + {item.name} + + + + ); + } + + // Render internal link (React Router) return ( )} + {env.VITE_OPENSEARCH_DASHBOARDS_URL && workflowId && ( + + )} + + + + + + + {/* Storage Usage Card */} +
+
+
+ +
+
+

Storage Usage

+

+ Current storage consumption for analytics data +

+ +
+ {storageUsage ? ( +
+
+ Used + {storageUsage.used} +
+
+ Total Available + {storageUsage.total} +
+ {/* Progress bar could be added here */} +
+ ) : ( +
+ Storage usage information will be available once analytics data is collected. +
+ )} +
+
+
+
+ + + {/* Info Box */} +
+

About Analytics Data

+
    +
  • + • Analytics data is indexed from workflow executions using the Analytics Sink + component +
  • +
  • • Data includes security findings, scan results, and other workflow outputs
  • +
  • • You can query this data via the API or view it in OpenSearch Dashboards
  • +
  • • Retention settings apply organization-wide and cannot exceed your tier limit
  • +
+
+ + + ); +} diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index e8818ab3..00d75468 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -23,9 +23,21 @@ export default defineConfig({ host: '0.0.0.0', port: 5173, open: false, - allowedHosts: ['studio.shipsec.ai'], + allowedHosts: ['studio.shipsec.ai', 'frontend'], + proxy: { + '/api': { + target: 'http://localhost:3211', + changeOrigin: true, + secure: false, + }, + '/analytics': { + target: 'http://localhost:5601', + changeOrigin: true, + secure: false, + }, + }, }, preview: { - allowedHosts: ['studio.shipsec.ai'], + allowedHosts: ['studio.shipsec.ai', 'frontend'], }, }) diff --git a/packages/component-sdk/src/analytics.ts b/packages/component-sdk/src/analytics.ts new file mode 100644 index 00000000..2035eb2a --- /dev/null +++ b/packages/component-sdk/src/analytics.ts @@ -0,0 +1,66 @@ +/** + * Analytics helpers for component authors. + * + * These utilities help components output structured findings + * that can be indexed into OpenSearch via the Analytics Sink. + */ + +import { createHash } from 'crypto'; +import { z } from 'zod'; +import { withPortMeta } from './port-meta'; + +// Analytics Results Contract +export const analyticsResultContractName = 'core.analytics.result.v1'; + +export const severitySchema = z.enum(['critical', 'high', 'medium', 'low', 'info', 'none']); + +export const analyticsResultSchema = () => + withPortMeta( + z + .object({ + scanner: z.string().describe('Scanner/component that produced this result'), + finding_hash: z.string().describe('Stable 16-char hash for deduplication'), + severity: severitySchema.describe('Finding severity level, use "none" if not applicable'), + asset_key: z + .string() + .optional() + .describe('Primary asset identifier (auto-detected if missing)'), + }) + .passthrough(), // Allow scanner-specific fields + { schemaName: analyticsResultContractName } + ); + +export type AnalyticsResult = z.infer>; +export type Severity = z.infer; + +/** + * Generate a stable hash for finding deduplication. + * + * The hash is used to track findings across workflow runs: + * - Identify new vs recurring findings + * - Calculate first-seen / last-seen timestamps + * - Deduplicate findings in dashboards + * + * @param fields - Key identifying fields of the finding (e.g., templateId, host, matchedAt) + * @returns 16-character hex string (SHA-256 truncated) + * + * @example + * ```typescript + * // Nuclei scanner + * const hash = generateFindingHash(finding.templateId, finding.host, finding.matchedAt); + * + * // TruffleHog scanner + * const hash = generateFindingHash(secret.DetectorType, secret.Redacted, filePath); + * + * // Supabase scanner + * const hash = generateFindingHash(check.check_id, projectRef, check.resource); + * ``` + */ +export function generateFindingHash( + ...fields: (string | undefined | null)[] +): string { + const normalized = fields + .map((f) => (f ?? '').toLowerCase().trim()) + .join('|'); + return createHash('sha256').update(normalized).digest('hex').slice(0, 16); +} diff --git a/packages/component-sdk/src/context.ts b/packages/component-sdk/src/context.ts index cfdd0c0f..538e0b2e 100644 --- a/packages/component-sdk/src/context.ts +++ b/packages/component-sdk/src/context.ts @@ -45,10 +45,13 @@ export interface CreateContextOptions { logCollector?: (entry: LogEventInput) => void; terminalCollector?: (chunk: TerminalChunkInput) => void; agentTracePublisher?: AgentTracePublisher; + workflowId?: string; + workflowName?: string; + organizationId?: string | null; } export function createExecutionContext(options: CreateContextOptions): ExecutionContext { - const { runId, componentRef, metadata: metadataInput, storage, secrets, artifacts, trace, logCollector, terminalCollector, agentTracePublisher } = + const { runId, componentRef, metadata: metadataInput, storage, secrets, artifacts, trace, logCollector, terminalCollector, agentTracePublisher, workflowId, workflowName, organizationId } = options; const metadata = createMetadata(runId, componentRef, metadataInput); const scopedTrace = trace ? createScopedTrace(trace, metadata) : undefined; @@ -145,6 +148,9 @@ export function createExecutionContext(options: CreateContextOptions): Execution terminalCollector, metadata, agentTracePublisher, + workflowId, + workflowName, + organizationId, http: undefined as unknown as ExecutionContext['http'], }; diff --git a/packages/component-sdk/src/index.ts b/packages/component-sdk/src/index.ts index 33671e09..6e3a71f2 100644 --- a/packages/component-sdk/src/index.ts +++ b/packages/component-sdk/src/index.ts @@ -35,3 +35,6 @@ export * from './zod-parameters'; export * from './json-schema'; export * from './schema-validation'; export * from './zod-coerce'; + +// Analytics helpers for component authors +export * from './analytics'; diff --git a/packages/component-sdk/src/types.ts b/packages/component-sdk/src/types.ts index 0cbad769..105c6191 100644 --- a/packages/component-sdk/src/types.ts +++ b/packages/component-sdk/src/types.ts @@ -344,6 +344,11 @@ export interface ExecutionContext { metadata: ExecutionContextMetadata; agentTracePublisher?: AgentTracePublisher; + // Workflow context (optional, available when running in workflow) + workflowId?: string; + workflowName?: string; + organizationId?: string | null; + // Service interfaces - implemented by adapters storage?: IFileStorageService; secrets?: ISecretsService; diff --git a/worker/.env.example b/worker/.env.example index 0e8c26ed..6320b5b6 100644 --- a/worker/.env.example +++ b/worker/.env.example @@ -19,3 +19,9 @@ LOKI_URL=http://localhost:3100 LOKI_TENANT_ID= LOKI_USERNAME= LOKI_PASSWORD= + +# OpenSearch Configuration (Optional - for Analytics Sink component) +# Leave empty to disable analytics indexing +OPENSEARCH_URL=http://localhost:9200 +OPENSEARCH_USERNAME= +OPENSEARCH_PASSWORD= diff --git a/worker/package.json b/worker/package.json index 0657c102..7ffa643b 100644 --- a/worker/package.json +++ b/worker/package.json @@ -25,6 +25,7 @@ "@googleapis/admin": "^29.0.0", "@grpc/grpc-js": "^1.14.0", "@okta/okta-sdk-nodejs": "^7.3.0", + "@opensearch-project/opensearch": "^3.5.1", "@shipsec/component-sdk": "*", "@shipsec/contracts": "*", "@shipsec/shared": "*", diff --git a/worker/src/components/core/analytics-sink.ts b/worker/src/components/core/analytics-sink.ts new file mode 100644 index 00000000..51fd647b --- /dev/null +++ b/worker/src/components/core/analytics-sink.ts @@ -0,0 +1,250 @@ +import { z } from 'zod'; +import { + componentRegistry, + defineComponent, + inputs, + outputs, + parameters, + port, + param, + analyticsResultSchema, +} from '@shipsec/component-sdk'; + +const inputSchema = inputs({ + data: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Array of analytics results with required scanner, finding_hash, and severity fields. Each array item is indexed as a separate document. Additional scanner-specific fields are preserved.', + }), +}); + +const outputSchema = outputs({ + indexed: port(z.boolean(), { + label: 'Indexed', + description: 'Indicates whether the data was successfully indexed to OpenSearch.', + }), + documentCount: port(z.number(), { + label: 'Document Count', + description: 'Number of documents indexed (1 for objects, array length for arrays).', + }), + indexName: port(z.string(), { + label: 'Index Name', + description: 'Name of the OpenSearch index where data was stored.', + }), +}); + +const parameterSchema = parameters({ + indexSuffix: param( + z + .string() + .optional() + .describe( + 'Optional suffix to append to the index name. Defaults to workflow slug if not provided.', + ), + { + label: 'Index Suffix', + editor: 'text', + placeholder: 'workflow-slug (default)', + description: + 'Custom suffix for the index name (e.g., "subdomain-enum"). Defaults to workflow slug if not provided.', + }, + ), + assetKeyField: param( + z + .enum([ + 'auto', + 'asset_key', + 'host', + 'domain', + 'subdomain', + 'url', + 'ip', + 'asset', + 'target', + 'custom', + ]) + .default('auto') + .describe( + 'Field name to use as the asset_key. Auto-detect checks common fields (asset_key, host, domain, subdomain, url, ip, asset, target) in priority order.', + ), + { + label: 'Asset Key Field', + editor: 'select', + options: [ + { label: 'Auto-detect', value: 'auto' }, + { label: 'asset_key', value: 'asset_key' }, + { label: 'host', value: 'host' }, + { label: 'domain', value: 'domain' }, + { label: 'subdomain', value: 'subdomain' }, + { label: 'url', value: 'url' }, + { label: 'ip', value: 'ip' }, + { label: 'asset', value: 'asset' }, + { label: 'target', value: 'target' }, + { label: 'Custom field name', value: 'custom' }, + ], + description: + 'Specify which field to use as the asset identifier. Auto-detect uses priority: asset_key > host > domain > subdomain > url > ip > asset > target.', + }, + ), + customAssetKeyField: param( + z + .string() + .optional() + .describe('Custom field name to use as asset_key when assetKeyField is set to "custom".'), + { + label: 'Custom Field Name', + editor: 'text', + placeholder: 'e.g., hostname, endpoint, etc.', + description: 'Enter the custom field name to use as the asset identifier.', + visibleWhen: { assetKeyField: 'custom' }, + }, + ), + failOnError: param( + z + .boolean() + .default(false) + .describe( + 'Whether to fail the workflow if indexing fails. Default is false (fire-and-forget).', + ), + { + label: 'Fail workflow if indexing fails', + editor: 'boolean', + description: + "When enabled, the workflow will stop if indexing to OpenSearch fails. By default, indexing errors are logged but don't stop the workflow.", + }, + ), +}); + +const definition = defineComponent({ + id: 'core.analytics.sink', + label: 'Analytics Sink', + category: 'output', + runner: { kind: 'inline' }, + inputs: inputSchema, + outputs: outputSchema, + parameters: parameterSchema, + docs: 'Indexes structured analytics results into OpenSearch for dashboards, queries, and alerts. Requires results to follow the `core.analytics.result.v1` contract with scanner, finding_hash, and severity fields. Connect the `results` port from scanner components. Each array item becomes a separate document with workflow context stored under `shipsec.*`. Indexing is fire-and-forget by default.', + ui: { + slug: 'analytics-sink', + version: '1.0.0', + type: 'output', + category: 'output', + description: + 'Index security findings and workflow outputs into OpenSearch for analytics, dashboards, and alerting.', + icon: 'BarChart3', + author: { + name: 'ShipSecAI', + type: 'shipsecai', + }, + isLatest: true, + deprecated: false, + examples: [ + 'Index subdomain enumeration results for tracking asset discovery over time.', + 'Store vulnerability scan findings for correlation and trend analysis.', + 'Aggregate security metrics across multiple workflows into unified dashboards.', + ], + }, + async execute({ inputs, params }, context) { + const { getOpenSearchIndexer } = await import('../../utils/opensearch-indexer'); + const indexer = getOpenSearchIndexer(); + + const documentCount = inputs.data.length; + + // Check if indexing is enabled + if (!indexer.isEnabled()) { + context.logger.debug( + '[Analytics Sink] OpenSearch not configured, skipping indexing (fire-and-forget)', + ); + return { + indexed: false, + documentCount, + indexName: '', + }; + } + + // Validate required workflow context + if (!context.workflowId || !context.workflowName || !context.organizationId) { + const error = new Error( + 'Analytics Sink requires workflow context (workflowId, workflowName, organizationId)', + ); + context.logger.error(`[Analytics Sink] ${error.message}`); + if (params.failOnError) { + throw error; + } + return { + indexed: false, + documentCount: 0, + indexName: '', + }; + } + + // Runtime validation of analytics result contract + const validated = z.array(analyticsResultSchema()).safeParse(inputs.data); + if (!validated.success) { + const errorMessage = `Invalid analytics results format: ${validated.error.message}`; + context.logger.error(`[Analytics Sink] ${errorMessage}`); + if (params.failOnError) { + throw new Error(errorMessage); + } + return { + indexed: false, + documentCount, + indexName: '', + }; + } + + try { + // Determine the actual asset key field to use + let assetKeyField: string | undefined; + if (params.assetKeyField === 'auto') { + // Auto-detect mode: let the indexer determine the asset key field + assetKeyField = undefined; + } else if (params.assetKeyField === 'custom') { + // Custom mode: use the custom field name if provided + assetKeyField = params.customAssetKeyField; + } else { + // Specific field selected + assetKeyField = params.assetKeyField; + } + + const indexOptions = { + workflowId: context.workflowId, + workflowName: context.workflowName, + runId: context.runId, + nodeRef: context.componentRef, + componentId: 'core.analytics.sink', + assetKeyField, + indexSuffix: params.indexSuffix, + trace: context.trace, + }; + + context.logger.info(`[Analytics Sink] Bulk indexing ${documentCount} documents`); + const result = await indexer.bulkIndex(context.organizationId, validated.data, indexOptions); + + context.logger.info( + `[Analytics Sink] Successfully indexed ${result.documentCount} document(s) to ${result.indexName}`, + ); + return { + indexed: true, + documentCount: result.documentCount, + indexName: result.indexName, + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error during indexing'; + context.logger.error(`[Analytics Sink] Indexing failed: ${errorMessage}`); + + if (params.failOnError) { + throw error; + } + + // Fire-and-forget mode: log error but don't fail workflow + return { + indexed: false, + documentCount, + indexName: '', + }; + } + }, +}); + +componentRegistry.register(definition); diff --git a/worker/src/components/index.ts b/worker/src/components/index.ts index 00139859..a2a3674f 100644 --- a/worker/src/components/index.ts +++ b/worker/src/components/index.ts @@ -27,6 +27,7 @@ import './core/destination-artifact'; import './core/destination-s3'; import './core/text-block'; import './core/workflow-call'; +import './core/analytics-sink'; // Manual Action components import './manual-action/manual-approval'; import './manual-action/manual-selection'; diff --git a/worker/src/components/security/__tests__/nuclei.test.ts b/worker/src/components/security/__tests__/nuclei.test.ts index 9bd6ee20..4208cb42 100644 --- a/worker/src/components/security/__tests__/nuclei.test.ts +++ b/worker/src/components/security/__tests__/nuclei.test.ts @@ -147,6 +147,7 @@ describe('Nuclei Component', () => { timestamp: '2024-12-04T10:00:00Z', }, ], + results: [], rawOutput: '{"template-id":"CVE-2024-1234"}', targetCount: 1, findingCount: 1, @@ -174,6 +175,7 @@ describe('Nuclei Component', () => { timestamp: '2024-12-04T10:00:00Z', }, ], + results: [], rawOutput: '', targetCount: 1, findingCount: 1, @@ -200,6 +202,7 @@ describe('Nuclei Component', () => { ip: '1.2.3.4', }, ], + results: [], rawOutput: '', targetCount: 1, findingCount: 1, diff --git a/worker/src/components/security/__tests__/trufflehog.test.ts b/worker/src/components/security/__tests__/trufflehog.test.ts index d3c4e13a..e9bfff11 100644 --- a/worker/src/components/security/__tests__/trufflehog.test.ts +++ b/worker/src/components/security/__tests__/trufflehog.test.ts @@ -97,6 +97,18 @@ describe('trufflehog component', () => { secretCount: 1, verifiedCount: 1, hasVerifiedSecrets: true, + results: [ + { + DetectorType: 'AWS', + DetectorName: 'AWS', + Verified: true, + Raw: 'AKIAIOSFODNN7EXAMPLE', + scanner: 'trufflehog', + severity: 'high', + finding_hash: 'abc123def456abcd', + asset_key: 'https://github.com/test/repo', + }, + ], }; vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(JSON.stringify(mockOutput)); @@ -107,6 +119,9 @@ describe('trufflehog component', () => { expect(result.verifiedCount).toBe(1); expect(result.hasVerifiedSecrets).toBe(true); expect(result.secrets).toHaveLength(1); + expect(result.results).toHaveLength(1); + expect(result.results[0].scanner).toBe('trufflehog'); + expect(result.results[0].severity).toBe('high'); }); it('should handle no secrets found', async () => { @@ -135,6 +150,7 @@ describe('trufflehog component', () => { secretCount: 0, verifiedCount: 0, hasVerifiedSecrets: false, + results: [], }; vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(JSON.stringify(mockOutput)); @@ -145,6 +161,7 @@ describe('trufflehog component', () => { expect(result.verifiedCount).toBe(0); expect(result.hasVerifiedSecrets).toBe(false); expect(result.secrets).toHaveLength(0); + expect(result.results).toHaveLength(0); }); it('should support different scan types', () => { @@ -234,6 +251,26 @@ describe('trufflehog component', () => { secretCount: 2, verifiedCount: 1, hasVerifiedSecrets: true, + results: [ + { + DetectorType: 'Generic', + Verified: false, + Raw: 'potential_secret_123', + scanner: 'trufflehog', + severity: 'high', + finding_hash: 'def456abc789def0', + asset_key: 'https://github.com/test/repo', + }, + { + DetectorType: 'AWS', + Verified: true, + Raw: 'AKIAIOSFODNN7EXAMPLE', + scanner: 'trufflehog', + severity: 'high', + finding_hash: 'abc123def456abcd', + asset_key: 'https://github.com/test/repo', + }, + ], }; vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue(JSON.stringify(mockOutput)); @@ -243,6 +280,7 @@ describe('trufflehog component', () => { expect(result.secretCount).toBe(2); expect(result.verifiedCount).toBe(1); expect(result.hasVerifiedSecrets).toBe(true); + expect(result.results).toHaveLength(2); }); it('should handle parse errors gracefully', async () => { diff --git a/worker/src/components/security/nuclei.ts b/worker/src/components/security/nuclei.ts index 67f20a3e..eb268720 100644 --- a/worker/src/components/security/nuclei.ts +++ b/worker/src/components/security/nuclei.ts @@ -12,6 +12,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; import * as yaml from 'js-yaml'; @@ -185,6 +188,11 @@ const outputSchema = outputs({ description: 'Array of detected vulnerabilities with severity, tags, and matched URLs.', connectionType: { kind: 'list', element: { kind: 'primitive', name: 'json' } }, }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), rawOutput: port(z.string(), { label: 'Raw Output', description: 'Complete JSONL output from nuclei for downstream processing.', @@ -515,8 +523,17 @@ const definition = defineComponent({ `[Nuclei] Scan complete: ${findings.length} finding(s) from ${parsedInputs.targets.length} target(s)`, ); + // Build analytics-ready results with scanner metadata (follows core.analytics.result.v1 contract) + const results: AnalyticsResult[] = findings.map((finding) => ({ + ...finding, + scanner: 'nuclei', + asset_key: finding.host ?? finding.matchedAt, + finding_hash: generateFindingHash(finding.templateId, finding.host, finding.matchedAt), + })); + const output = { findings, + results, rawOutput: stdout, targetCount: parsedInputs.targets.length, findingCount: findings.length, diff --git a/worker/src/components/security/supabase-scanner.ts b/worker/src/components/security/supabase-scanner.ts index 4bd0d167..5b1d3c96 100644 --- a/worker/src/components/security/supabase-scanner.ts +++ b/worker/src/components/security/supabase-scanner.ts @@ -10,8 +10,11 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, + type DockerRunnerConfig, } from '@shipsec/component-sdk'; -import type { DockerRunnerConfig } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; // Extract Supabase project ref from a standard URL like https://.supabase.co @@ -150,6 +153,11 @@ const outputSchema = outputs({ reason: 'Scanner issue payloads can vary by Supabase project configuration.', connectionType: { kind: 'list', element: { kind: 'primitive', name: 'json' } }, }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), report: port(z.unknown(), { label: 'Scanner Report', description: 'Full JSON report produced by the scanner.', @@ -329,6 +337,19 @@ const definition = defineComponent({ } catch (err) { const msg = (err as Error)?.message ?? 'Unknown error'; context.logger.error(`[SupabaseScanner] Scanner failed: ${msg}`); + + // Check if this is a fatal Docker error (image pull failure, container start failure) + // These should fail hard, not gracefully degrade + if ( + msg.includes('exit code 125') || + msg.includes('Unable to find image') || + msg.includes('permission denied') || + msg.includes('authentication required') + ) { + throw err; + } + + // For other errors (scanner runtime errors), allow graceful degradation errors.push(msg); } @@ -357,6 +378,22 @@ const definition = defineComponent({ } catch (err) { const msg = (err as Error)?.message ?? 'Unknown error'; context.logger.error(`[SupabaseScanner] Scanner failed: ${msg}`); + + // Check if this is a fatal Docker error that should fail the workflow + if ( + msg.includes('exit code 125') || + msg.includes('Unable to find image') || + msg.includes('permission denied') || + msg.includes('authentication required') + ) { + // Cleanup volume before throwing + if (volumeInitialized) { + await volume.cleanup(); + context.logger.info('[SupabaseScanner] Cleaned up isolated volume'); + } + throw err; + } + errors.push(msg); } finally { if (volumeInitialized) { @@ -365,11 +402,34 @@ const definition = defineComponent({ } } + // Build analytics-ready results with scanner metadata (follows core.analytics.result.v1 contract) + const results: AnalyticsResult[] = (issues ?? []).map((issue) => { + const issueObj = typeof issue === 'object' && issue !== null ? issue : { raw: issue }; + const issueRecord = issueObj as Record; + // Extract check_id and resource for deduplication hash + const checkId = issueRecord.check_id as string | undefined; + const resource = issueRecord.resource as string | undefined; + // Map severity from scanner output or default to 'medium' for security issues + const rawSeverity = (issueRecord.severity as string | undefined)?.toLowerCase(); + const validSeverities = ['critical', 'high', 'medium', 'low', 'info', 'none'] as const; + const severity = validSeverities.includes(rawSeverity as (typeof validSeverities)[number]) + ? (rawSeverity as (typeof validSeverities)[number]) + : 'medium'; + return { + ...issueObj, + scanner: 'supabase-scanner', + severity, + asset_key: projectRef ?? undefined, + finding_hash: generateFindingHash(checkId, projectRef, resource), + }; + }); + const output: Output = { projectRef: projectRef ?? null, score, summary, issues, + results, report, rawOutput: stdoutCombined ?? '', errors: errors.length > 0 ? errors : undefined, diff --git a/worker/src/components/security/trufflehog.ts b/worker/src/components/security/trufflehog.ts index 731f0b93..76d9b728 100644 --- a/worker/src/components/security/trufflehog.ts +++ b/worker/src/components/security/trufflehog.ts @@ -12,6 +12,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -186,6 +189,11 @@ const outputSchema = outputs({ label: 'Has Verified Secrets', description: 'True when any verified secrets are detected.', }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); // Helper function to build TruffleHog command arguments @@ -256,6 +264,7 @@ function parseRawOutput(rawOutput: string): Output { secretCount: 0, verifiedCount: 0, hasVerifiedSecrets: false, + results: [], }; } @@ -294,6 +303,7 @@ function parseRawOutput(rawOutput: string): Output { secretCount: secrets.length, verifiedCount, hasVerifiedSecrets: verifiedCount > 0, + results: [], // Populated in execute() with scanner metadata }; } @@ -489,7 +499,23 @@ const definition = defineComponent({ }); } - return output; + // Build analytics-ready results with scanner metadata (follows core.analytics.result.v1 contract) + const results: AnalyticsResult[] = output.secrets.map((secret: Secret) => { + // Extract file path from source metadata for hashing + const filePath = + secret.SourceMetadata?.Data?.Git?.file ?? + secret.SourceMetadata?.Data?.Filesystem?.file ?? + ''; + return { + ...secret, + scanner: 'trufflehog', + severity: 'high' as const, // Secrets are always high severity + asset_key: runnerPayload.scanTarget, + finding_hash: generateFindingHash(secret.DetectorType, secret.Redacted, filePath), + }; + }); + + return { ...output, results }; } finally { // Always cleanup volume if it was created if (volume) { diff --git a/worker/src/temporal/__tests__/optional-input-handling.test.ts b/worker/src/temporal/__tests__/optional-input-handling.test.ts new file mode 100644 index 00000000..98d20a65 --- /dev/null +++ b/worker/src/temporal/__tests__/optional-input-handling.test.ts @@ -0,0 +1,279 @@ +/** + * Tests for optional input handling in component execution + * + * Validates that components with optional inputs (required: false or connectionType.kind === 'any') + * can proceed when upstream components return undefined values, instead of failing with + * a ValidationError. + * + * This tests the fix for workflows getting stuck in infinite retry loops when an upstream + * component fails gracefully and returns undefined for some outputs. + */ + +import { describe, expect, it, beforeAll } from 'bun:test'; +import { z } from 'zod'; +import { + componentRegistry, + defineComponent, + inputs, + outputs, + port, + createExecutionContext, + extractPorts, + type ComponentPortMetadata, +} from '@shipsec/component-sdk'; + +describe('Optional Input Handling', () => { + beforeAll(() => { + // Register test component with optional input (required: false) + if (!componentRegistry.has('test.optional.required-false')) { + const component = defineComponent({ + id: 'test.optional.required-false', + label: 'Optional Input (required: false)', + category: 'transform', + runner: { kind: 'inline' }, + inputs: inputs({ + requiredInput: port(z.string(), { + label: 'Required Input', + description: 'This input is required', + }), + optionalInput: port(z.string().optional(), { + label: 'Optional Input', + description: 'This input is optional', + }), + }), + outputs: outputs({ + result: port(z.string(), { label: 'Result' }), + }), + async execute({ inputs }) { + return { + result: `required: ${inputs.requiredInput}, optional: ${inputs.optionalInput ?? 'undefined'}`, + }; + }, + }); + componentRegistry.register(component); + } + + // Register test component with allowAny input (connectionType.kind === 'any') + if (!componentRegistry.has('test.optional.allow-any')) { + const component = defineComponent({ + id: 'test.optional.allow-any', + label: 'Optional Input (allowAny)', + category: 'transform', + runner: { kind: 'inline' }, + inputs: inputs({ + requiredInput: port(z.string(), { + label: 'Required Input', + description: 'This input is required', + }), + anyInput: port(z.any(), { + label: 'Any Input', + description: 'This input accepts any type including undefined', + allowAny: true, + reason: 'Accepts arbitrary data for testing', + connectionType: { kind: 'any' }, + }), + }), + outputs: outputs({ + result: port(z.string(), { label: 'Result' }), + }), + async execute({ inputs }) { + return { + result: `required: ${inputs.requiredInput}, any: ${inputs.anyInput ?? 'undefined'}`, + }; + }, + }); + componentRegistry.register(component); + } + + // Register test component with all required inputs + if (!componentRegistry.has('test.all-required')) { + const component = defineComponent({ + id: 'test.all-required', + label: 'All Required Inputs', + category: 'transform', + runner: { kind: 'inline' }, + inputs: inputs({ + input1: port(z.string(), { + label: 'Input 1', + description: 'Required input 1', + }), + input2: port(z.string(), { + label: 'Input 2', + description: 'Required input 2', + }), + }), + outputs: outputs({ + result: port(z.string(), { label: 'Result' }), + }), + async execute({ inputs }) { + return { result: `${inputs.input1} + ${inputs.input2}` }; + }, + }); + componentRegistry.register(component); + } + }); + + describe('extractPorts identifies optional inputs correctly', () => { + it('identifies required: false as optional', () => { + const component = componentRegistry.get('test.optional.required-false'); + expect(component).toBeDefined(); + + const ports = extractPorts(component!.inputs); + const optionalPort = ports.find((p: ComponentPortMetadata) => p.id === 'optionalInput'); + + expect(optionalPort).toBeDefined(); + expect(optionalPort!.required).toBe(false); + }); + + it('identifies connectionType.kind === "any" as optional', () => { + const component = componentRegistry.get('test.optional.allow-any'); + expect(component).toBeDefined(); + + const ports = extractPorts(component!.inputs); + const anyPort = ports.find((p: ComponentPortMetadata) => p.id === 'anyInput'); + + expect(anyPort).toBeDefined(); + expect(anyPort!.connectionType?.kind).toBe('any'); + }); + + it('identifies regular inputs as required', () => { + const component = componentRegistry.get('test.all-required'); + expect(component).toBeDefined(); + + const ports = extractPorts(component!.inputs); + + for (const port of ports) { + // Required is either undefined (defaults to true) or explicitly true + expect(port.required).not.toBe(false); + expect(port.connectionType?.kind).not.toBe('any'); + } + }); + }); + + describe('filterRequiredMissingInputs logic', () => { + /** + * This test validates the core logic used in run-component.activity.ts + * to filter out optional inputs from the missing inputs list. + */ + it('filters out optional inputs from missing list', () => { + const component = componentRegistry.get('test.optional.required-false'); + expect(component).toBeDefined(); + + const inputPorts = extractPorts(component!.inputs); + + // Simulate warnings for both inputs being undefined + const warningsToReport = [ + { target: 'requiredInput', sourceRef: 'upstream', sourceHandle: 'output' }, + { target: 'optionalInput', sourceRef: 'upstream', sourceHandle: 'output' }, + ]; + + // Apply the filtering logic from run-component.activity.ts + const requiredMissingInputs = warningsToReport.filter((warning) => { + const portMeta = inputPorts.find((p: ComponentPortMetadata) => p.id === warning.target); + if (!portMeta) return true; + if (portMeta.required === false) return false; + if (portMeta.connectionType?.kind === 'any') return false; + return true; + }); + + // Only requiredInput should be in the filtered list + expect(requiredMissingInputs).toHaveLength(1); + expect(requiredMissingInputs[0].target).toBe('requiredInput'); + }); + + it('filters out allowAny inputs from missing list', () => { + const component = componentRegistry.get('test.optional.allow-any'); + expect(component).toBeDefined(); + + const inputPorts = extractPorts(component!.inputs); + + // Simulate warnings for both inputs being undefined + const warningsToReport = [ + { target: 'requiredInput', sourceRef: 'upstream', sourceHandle: 'output' }, + { target: 'anyInput', sourceRef: 'upstream', sourceHandle: 'output' }, + ]; + + // Apply the filtering logic from run-component.activity.ts + const requiredMissingInputs = warningsToReport.filter((warning) => { + const portMeta = inputPorts.find((p: ComponentPortMetadata) => p.id === warning.target); + if (!portMeta) return true; + if (portMeta.required === false) return false; + if (portMeta.connectionType?.kind === 'any') return false; + return true; + }); + + // Only requiredInput should be in the filtered list + expect(requiredMissingInputs).toHaveLength(1); + expect(requiredMissingInputs[0].target).toBe('requiredInput'); + }); + + it('keeps all required inputs in missing list', () => { + const component = componentRegistry.get('test.all-required'); + expect(component).toBeDefined(); + + const inputPorts = extractPorts(component!.inputs); + + // Simulate warnings for both inputs being undefined + const warningsToReport = [ + { target: 'input1', sourceRef: 'upstream', sourceHandle: 'output' }, + { target: 'input2', sourceRef: 'upstream', sourceHandle: 'output' }, + ]; + + // Apply the filtering logic from run-component.activity.ts + const requiredMissingInputs = warningsToReport.filter((warning) => { + const portMeta = inputPorts.find((p: ComponentPortMetadata) => p.id === warning.target); + if (!portMeta) return true; + if (portMeta.required === false) return false; + if (portMeta.connectionType?.kind === 'any') return false; + return true; + }); + + // Both inputs should be in the filtered list + expect(requiredMissingInputs).toHaveLength(2); + }); + }); + + describe('component execution with optional inputs', () => { + it('executes component with undefined optional input (required: false)', async () => { + const component = componentRegistry.get('test.optional.required-false'); + expect(component).toBeDefined(); + + const context = createExecutionContext({ + runId: 'test-run', + componentRef: 'test-node', + }); + + // Execute with only the required input + const result = await component!.execute!( + { + inputs: { requiredInput: 'hello', optionalInput: undefined }, + params: {}, + }, + context, + ); + + expect(result).toEqual({ result: 'required: hello, optional: undefined' }); + }); + + it('executes component with undefined allowAny input', async () => { + const component = componentRegistry.get('test.optional.allow-any'); + expect(component).toBeDefined(); + + const context = createExecutionContext({ + runId: 'test-run', + componentRef: 'test-node', + }); + + // Execute with only the required input + const result = await component!.execute!( + { + inputs: { requiredInput: 'hello', anyInput: undefined }, + params: {}, + }, + context, + ); + + expect(result).toEqual({ result: 'required: hello, any: undefined' }); + }); + }); +}); diff --git a/worker/src/temporal/activities/run-component.activity.ts b/worker/src/temporal/activities/run-component.activity.ts index e7380851..1c42231b 100644 --- a/worker/src/temporal/activities/run-component.activity.ts +++ b/worker/src/temporal/activities/run-component.activity.ts @@ -138,6 +138,9 @@ export async function runComponentActivity( const context = createExecutionContext({ runId: input.runId, componentRef: action.ref, + workflowId: input.workflowId, + workflowName: input.workflowName, + organizationId: input.organizationId ?? null, metadata: { activityId: activityInfo.activityId, attempt: activityInfo.attempt, @@ -365,21 +368,53 @@ export async function runComponentActivity( await resolveSecretParams(resolvedParams, input.rawParams ?? {}); + // Get input port metadata to check which inputs are truly required + let inputsSchemaForValidation = component.inputs; + if (typeof component.resolvePorts === 'function') { + try { + const resolved = component.resolvePorts(resolvedParams); + if (resolved?.inputs) { + inputsSchemaForValidation = resolved.inputs; + } + } catch { + // If port resolution fails, use the base schema + } + } + const inputPorts = inputsSchemaForValidation ? extractPorts(inputsSchemaForValidation) : []; + + // Filter warnings to only those for truly required inputs + // An input is NOT required if: + // - Its schema allows undefined/null (required: false) + // - It accepts any type (connectionType.kind === 'any') which includes undefined + const requiredMissingInputs = warningsToReport.filter((warning) => { + const portMeta = inputPorts.find((p: ComponentPortMetadata) => p.id === warning.target); + // If we can't find the port metadata, assume it's required to be safe + if (!portMeta) return true; + // If marked as not required, it's optional + if (portMeta.required === false) return false; + // If connectionType is 'any', it accepts undefined + if (portMeta.connectionType?.kind === 'any') return false; + return true; + }); + + // Log warnings for all undefined inputs (even optional ones) for (const warning of warningsToReport) { + const isRequired = requiredMissingInputs.some((r) => r.target === warning.target); context.trace?.record({ type: 'NODE_PROGRESS', timestamp: new Date().toISOString(), message: `Input '${warning.target}' mapped from ${warning.sourceRef}.${warning.sourceHandle} was undefined`, - level: 'warn', + level: isRequired ? 'error' : 'warn', data: warning, }); } - if (warningsToReport.length > 0) { - const missing = warningsToReport.map((warning) => `'${warning.target}'`).join(', '); + // Only throw if there are truly missing required inputs + if (requiredMissingInputs.length > 0) { + const missing = requiredMissingInputs.map((warning) => `'${warning.target}'`).join(', '); throw new ValidationError(`Missing required inputs for ${action.ref}: ${missing}`, { fieldErrors: Object.fromEntries( - warningsToReport.map((w) => [ + requiredMissingInputs.map((w) => [ w.target, [`mapped from ${w.sourceRef}.${w.sourceHandle} was undefined`], ]), diff --git a/worker/src/temporal/types.ts b/worker/src/temporal/types.ts index 76a3a175..60c8368a 100644 --- a/worker/src/temporal/types.ts +++ b/worker/src/temporal/types.ts @@ -68,6 +68,7 @@ export interface WorkflowDefinition { export interface RunComponentActivityInput { runId: string; workflowId: string; + workflowName?: string; workflowVersionId?: string | null; organizationId?: string | null; action: { diff --git a/worker/src/temporal/workflow-runner.ts b/worker/src/temporal/workflow-runner.ts index ab5cef3e..166e99fc 100644 --- a/worker/src/temporal/workflow-runner.ts +++ b/worker/src/temporal/workflow-runner.ts @@ -304,6 +304,9 @@ export async function executeWorkflow( artifacts: scopedArtifacts, trace: options.trace, logCollector: forwardLog, + workflowId: options.workflowId, + workflowName: definition.title, + organizationId: options.organizationId, }); try { diff --git a/worker/src/temporal/workflows/index.ts b/worker/src/temporal/workflows/index.ts index 7db6dea8..4534fd8d 100644 --- a/worker/src/temporal/workflows/index.ts +++ b/worker/src/temporal/workflows/index.ts @@ -499,6 +499,7 @@ export async function shipsecWorkflowRun( const activityInput: RunComponentActivityInput = { runId: input.runId, workflowId: input.workflowId, + workflowName: input.definition.title, workflowVersionId: input.workflowVersionId ?? null, organizationId: input.organizationId ?? null, action: { diff --git a/worker/src/utils/opensearch-indexer.ts b/worker/src/utils/opensearch-indexer.ts new file mode 100644 index 00000000..4dfa51e4 --- /dev/null +++ b/worker/src/utils/opensearch-indexer.ts @@ -0,0 +1,374 @@ +import { Client } from '@opensearch-project/opensearch'; +import type { IScopedTraceService } from '@shipsec/component-sdk'; + +interface IndexOptions { + workflowId: string; + workflowName: string; + runId: string; + nodeRef: string; + componentId: string; + assetKeyField?: string; + indexSuffix?: string; + trace?: IScopedTraceService; +} + +/** + * Retry helper with exponential backoff + * Attempts: 3, delays: 1s, 2s, 4s + */ +async function retryWithBackoff(operation: () => Promise, operationName: string): Promise { + const maxAttempts = 3; + const delays = [1000, 2000, 4000]; // milliseconds + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + return await operation(); + } catch (error) { + const isLastAttempt = attempt === maxAttempts - 1; + + if (isLastAttempt) { + throw error; // Re-throw on last attempt + } + + const delay = delays[attempt]; + console.warn( + `[OpenSearchIndexer] ${operationName} failed (attempt ${attempt + 1}/${maxAttempts}), ` + + `retrying in ${delay}ms...`, + error, + ); + + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + + // This should never be reached, but TypeScript requires it + throw new Error(`${operationName} failed after ${maxAttempts} attempts`); +} + +export class OpenSearchIndexer { + private client: Client | null = null; + private enabled = false; + + constructor() { + const url = process.env.OPENSEARCH_URL; + const username = process.env.OPENSEARCH_USERNAME; + const password = process.env.OPENSEARCH_PASSWORD; + + if (url) { + try { + this.client = new Client({ + node: url, + ...(username && + password && { + auth: { + username, + password, + }, + }), + ssl: { + rejectUnauthorized: process.env.NODE_ENV === 'production', + }, + }); + this.enabled = true; + console.log('[OpenSearchIndexer] Client initialized'); + } catch (error) { + console.warn('[OpenSearchIndexer] Failed to initialize client:', error); + } + } else { + console.debug('[OpenSearchIndexer] OpenSearch URL not configured, indexing disabled'); + } + } + + isEnabled(): boolean { + return this.enabled && this.client !== null; + } + + /** + * Serialize nested objects and arrays to JSON strings to prevent field explosion. + * Preserves primitive values (string, number, boolean, null) as-is. + */ + private serializeNestedFields(document: Record): Record { + const result: Record = {}; + + for (const [key, value] of Object.entries(document)) { + if (value === null || value === undefined) { + result[key] = value; + } else if (typeof value === 'object') { + // Serialize objects and arrays to JSON strings + result[key] = JSON.stringify(value); + } else { + // Preserve primitives (string, number, boolean) + result[key] = value; + } + } + + return result; + } + + /** + * Build the enriched document structure with _shipsec context. + * - Component data fields at root level (nested objects serialized) + * - Workflow context under _shipsec namespace (prevents field collision) + */ + private buildEnrichedDocument( + document: Record, + options: IndexOptions, + orgId: string, + timestamp: string, + assetKey: string | null, + ): Record { + // Serialize nested objects in the document to prevent field explosion + const serializedDocument = this.serializeNestedFields(document); + + return { + // Component data at root level (serialized) + ...serializedDocument, + + // Workflow context under shipsec namespace (no underscore prefix for UI visibility) + shipsec: { + organization_id: orgId, + run_id: options.runId, + workflow_id: options.workflowId, + workflow_name: options.workflowName, + component_id: options.componentId, + node_ref: options.nodeRef, + ...(assetKey && { asset_key: assetKey }), + }, + + // Standard timestamp + '@timestamp': timestamp, + }; + } + + async indexDocument( + orgId: string, + document: Record, + options: IndexOptions, + ): Promise { + if (!this.isEnabled() || !this.client) { + console.debug('[OpenSearchIndexer] Indexing skipped, client not enabled'); + throw new Error('OpenSearch client not enabled'); + } + + const indexName = this.buildIndexName(orgId, options.indexSuffix); + const assetKey = this.detectAssetKey(document, options.assetKeyField); + const timestamp = new Date().toISOString(); + + const enrichedDocument = this.buildEnrichedDocument( + document, + options, + orgId, + timestamp, + assetKey, + ); + + try { + await retryWithBackoff(async () => { + await this.client!.index({ + index: indexName, + body: enrichedDocument, + }); + }, `Index document to ${indexName}`); + + console.debug(`[OpenSearchIndexer] Indexed document to ${indexName}`); + + // Log successful indexing to trace + if (options.trace) { + options.trace.record({ + type: 'NODE_PROGRESS', + level: 'info', + message: `Successfully indexed 1 document to ${indexName}`, + data: { + indexName, + documentCount: 1, + assetKey: assetKey ?? undefined, + }, + }); + } + + return indexName; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`[OpenSearchIndexer] Failed to index document after retries:`, error); + + // Log indexing error to trace + if (options.trace) { + options.trace.record({ + type: 'NODE_PROGRESS', + level: 'error', + message: `Failed to index document to ${indexName}`, + error: errorMessage, + data: { + indexName, + documentCount: 1, + }, + }); + } + + throw error; + } + } + + async bulkIndex( + orgId: string, + documents: Record[], + options: IndexOptions, + ): Promise<{ indexName: string; documentCount: number }> { + if (!this.isEnabled() || !this.client) { + console.debug('[OpenSearchIndexer] Bulk indexing skipped, client not enabled'); + throw new Error('OpenSearch client not enabled'); + } + + if (documents.length === 0) { + console.debug('[OpenSearchIndexer] No documents to index'); + return { indexName: '', documentCount: 0 }; + } + + const indexName = this.buildIndexName(orgId, options.indexSuffix); + + // Use same timestamp for all documents in this batch + // (they all came from the same component execution) + const timestamp = new Date().toISOString(); + + // Build bulk operations array + const bulkOps: any[] = []; + for (const document of documents) { + const assetKey = this.detectAssetKey(document, options.assetKeyField); + + const enrichedDocument = this.buildEnrichedDocument( + document, + options, + orgId, + timestamp, + assetKey, + ); + + bulkOps.push({ index: { _index: indexName } }); + bulkOps.push(enrichedDocument); + } + + try { + const response = await retryWithBackoff(async () => { + return await this.client!.bulk({ + body: bulkOps, + }); + }, `Bulk index ${documents.length} documents to ${indexName}`); + + if (response.body.errors) { + const failedItems = response.body.items.filter((item: any) => item.index?.error); + const errorCount = failedItems.length; + + // Log first 3 error details for debugging + const errorSamples = failedItems.slice(0, 3).map((item: any) => ({ + type: item.index?.error?.type, + reason: item.index?.error?.reason, + })); + + console.warn( + `[OpenSearchIndexer] Bulk indexing completed with ${errorCount} errors out of ${documents.length} documents`, + ); + console.warn(`[OpenSearchIndexer] Error samples:`, JSON.stringify(errorSamples, null, 2)); + + // Log partial failure to trace + if (options.trace) { + options.trace.record({ + type: 'NODE_PROGRESS', + level: 'warn', + message: `Bulk indexed with ${errorCount} errors out of ${documents.length} documents to ${indexName}`, + data: { + indexName, + documentCount: documents.length, + errorCount, + errorSamples, + }, + }); + } + } else { + console.debug( + `[OpenSearchIndexer] Bulk indexed ${documents.length} documents to ${indexName}`, + ); + + // Log successful bulk indexing to trace + if (options.trace) { + options.trace.record({ + type: 'NODE_PROGRESS', + level: 'info', + message: `Successfully bulk indexed ${documents.length} documents to ${indexName}`, + data: { + indexName, + documentCount: documents.length, + }, + }); + } + } + + return { indexName, documentCount: documents.length }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`[OpenSearchIndexer] Failed to bulk index after retries:`, error); + + // Log bulk indexing error to trace + if (options.trace) { + options.trace.record({ + type: 'NODE_PROGRESS', + level: 'error', + message: `Failed to bulk index ${documents.length} documents to ${indexName}`, + error: errorMessage, + data: { + indexName, + documentCount: documents.length, + }, + }); + } + + throw error; + } + } + + private buildIndexName(orgId: string, indexSuffix?: string): string { + const date = new Date(); + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, '0'); + const day = String(date.getDate()).padStart(2, '0'); + + const suffix = indexSuffix || `${year}.${month}.${day}`; + return `security-findings-${orgId}-${suffix}`; + } + + private detectAssetKey(document: Record, explicitField?: string): string | null { + // If explicit field is provided, use it + if (explicitField && document[explicitField]) { + return String(document[explicitField]); + } + + // Auto-detect from common fields + const assetFields = [ + 'asset_key', + 'host', + 'domain', + 'subdomain', + 'url', + 'ip', + 'asset', + 'target', + ]; + + for (const field of assetFields) { + if (document[field]) { + return String(document[field]); + } + } + + return null; + } +} + +// Singleton instance +let indexerInstance: OpenSearchIndexer | null = null; + +export function getOpenSearchIndexer(): OpenSearchIndexer { + if (!indexerInstance) { + indexerInstance = new OpenSearchIndexer(); + } + return indexerInstance; +} From 1d496e0d99e21c492b86c99133581c348ba66d38 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 00:23:03 -0500 Subject: [PATCH 07/14] feat(analytics): enhance Security Analytics with dynamic inputs and auto-refresh - Add dynamic inputs editor with auto-populated source tags from workflow - Add results port to all security components for analytics output - Fix Data Explorer URL format to preserve time filter - Hide View Analytics button during running workflows - Auto-refresh OpenSearch index patterns after bulk indexing - Add OPENSEARCH_DASHBOARDS_URL env var for worker configuration Signed-off-by: Aseem Shrey --- docker/docker-compose.full.yml | 1 + frontend/src/components/layout/TopBar.tsx | 20 +- .../workflow/AnalyticsInputsEditor.tsx | 196 ++++++++++++++++++ .../workflow-builder/WorkflowBuilder.tsx | 1 + worker/.env.example | 5 + worker/src/components/core/analytics-sink.ts | 170 ++++++++++++--- .../security/__tests__/dnsx.test.ts | 6 +- .../security/__tests__/httpx.test.ts | 3 +- worker/src/components/security/abuseipdb.ts | 43 +++- worker/src/components/security/amass.ts | 19 ++ worker/src/components/security/dnsx.ts | 68 +++++- worker/src/components/security/httpx.ts | 28 ++- worker/src/components/security/naabu.ts | 23 ++ .../src/components/security/prowler-scan.ts | 53 +++++ .../components/security/shuffledns-massdns.ts | 37 +++- worker/src/components/security/subfinder.ts | 20 ++ worker/src/components/security/virustotal.ts | 39 ++++ worker/src/utils/opensearch-indexer.ts | 106 ++++++++-- 18 files changed, 770 insertions(+), 68 deletions(-) create mode 100644 frontend/src/components/workflow/AnalyticsInputsEditor.tsx diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml index 5fe42d88..3cd0b4a1 100644 --- a/docker/docker-compose.full.yml +++ b/docker/docker-compose.full.yml @@ -338,6 +338,7 @@ services: - EVENT_KAFKA_CLIENT_ID=shipsec-worker-events # OpenSearch for Analytics Sink - OPENSEARCH_URL=http://opensearch:9200 + - OPENSEARCH_DASHBOARDS_URL=http://opensearch-dashboards:5601/analytics depends_on: postgres: condition: service_healthy diff --git a/frontend/src/components/layout/TopBar.tsx b/frontend/src/components/layout/TopBar.tsx index acbeb387..7b788bf9 100644 --- a/frontend/src/components/layout/TopBar.tsx +++ b/frontend/src/components/layout/TopBar.tsx @@ -32,6 +32,7 @@ import { env } from '@/config/env'; interface TopBarProps { workflowId?: string; selectedRunId?: string | null; + selectedRunStatus?: string | null; isNew?: boolean; onRun?: () => void; onSave: () => Promise | void; @@ -49,6 +50,7 @@ const DEFAULT_WORKFLOW_NAME = 'Untitled Workflow'; export function TopBar({ workflowId, selectedRunId, + selectedRunStatus, onRun, onSave, onImport, @@ -456,7 +458,9 @@ export function TopBar({ )} - {env.VITE_OPENSEARCH_DASHBOARDS_URL && workflowId && ( + {env.VITE_OPENSEARCH_DASHBOARDS_URL && + workflowId && + (!selectedRunId || (selectedRunStatus && selectedRunStatus !== 'RUNNING')) && ( + + + {inputs.length === 0 ? ( +
+

No data inputs configured

+

+ Configure input ports to receive analytics results from different scanner components. + Each input creates a corresponding input port on this node. +

+ +
+ ) : ( +
+ {inputs.map((input, index) => ( +
+ {/* Header with drag handle and delete */} +
+ + Input {index + 1} + +
+ + {/* ID Field */} +
+ + updateInput(index, 'id', e.target.value)} + placeholder="e.g., nucleiResults" + className="h-8 text-xs font-mono" + /> +

+ Unique identifier (becomes input port ID) +

+
+ + {/* Label Field */} +
+ + updateInput(index, 'label', e.target.value)} + placeholder="e.g., Nuclei Results" + className="h-8 text-xs" + /> +

Display name in workflow editor

+
+ + {/* Source Tag Field */} +
+ + updateInput(index, 'sourceTag', e.target.value)} + placeholder="e.g., nuclei-scan" + className="h-8 text-xs font-mono" + /> +

+ Added to indexed documents as 'source_input' for filtering in dashboards + (optional) +

+
+ + {/* Input Port Preview */} +
+
+
+ + Input port: {input.id} + +
+
+
+ ))} +
+ )} + + {/* Summary */} + {inputs.length > 0 && ( +
+

+ {inputs.length} data input + {inputs.length !== 1 ? 's' : ''} configured +

+

+ {inputs.length} input port{inputs.length !== 1 ? 's' : ''} will be created on this node +

+
+ )} + + ); +} diff --git a/frontend/src/features/workflow-builder/WorkflowBuilder.tsx b/frontend/src/features/workflow-builder/WorkflowBuilder.tsx index 238db5cf..daf459db 100644 --- a/frontend/src/features/workflow-builder/WorkflowBuilder.tsx +++ b/frontend/src/features/workflow-builder/WorkflowBuilder.tsx @@ -901,6 +901,7 @@ function WorkflowBuilderContent() { ; + +// Base input schema - will be extended by resolvePorts +const baseInputSchema = inputs({}); + const outputSchema = outputs({ indexed: port(z.boolean(), { label: 'Indexed', @@ -34,6 +43,19 @@ const outputSchema = outputs({ }); const parameterSchema = parameters({ + dataInputs: param( + z + .array(dataInputDefinitionSchema) + .default([{ id: 'input1', label: 'Input 1', sourceTag: 'input_1' }]) + .describe('Define multiple data inputs from different scanner components'), + { + label: 'Data Inputs', + editor: 'analytics-inputs', + description: + 'Configure input ports for different scanner results. Each input creates a corresponding input port.', + helpText: 'Each input accepts AnalyticsResult[] and can be tagged for filtering in dashboards.', + }, + ), indexSuffix: param( z .string() @@ -104,13 +126,13 @@ const parameterSchema = parameters({ .boolean() .default(false) .describe( - 'Whether to fail the workflow if indexing fails. Default is false (fire-and-forget).', + 'Strict mode: requires all configured inputs to have data and validates all documents before indexing. Default is lenient (fire-and-forget).', ), { - label: 'Fail workflow if indexing fails', + label: 'Strict Mode (Fail on Error)', editor: 'boolean', description: - "When enabled, the workflow will stop if indexing to OpenSearch fails. By default, indexing errors are logged but don't stop the workflow.", + 'When enabled: requires ALL configured inputs to have data, validates ALL documents before indexing, and fails the workflow if any check fails. When disabled: skips missing inputs and logs errors without failing.', }, ), }); @@ -120,17 +142,17 @@ const definition = defineComponent({ label: 'Analytics Sink', category: 'output', runner: { kind: 'inline' }, - inputs: inputSchema, + inputs: baseInputSchema, outputs: outputSchema, parameters: parameterSchema, - docs: 'Indexes structured analytics results into OpenSearch for dashboards, queries, and alerts. Requires results to follow the `core.analytics.result.v1` contract with scanner, finding_hash, and severity fields. Connect the `results` port from scanner components. Each array item becomes a separate document with workflow context stored under `shipsec.*`. Indexing is fire-and-forget by default.', + docs: 'Indexes structured analytics results into OpenSearch for dashboards, queries, and alerts. Configure multiple data inputs to aggregate results from different scanner components. Each input can be tagged with a sourceTag for filtering in dashboards. Supports lenient (fire-and-forget) and strict (all-or-nothing) modes via the failOnError parameter.', ui: { slug: 'analytics-sink', - version: '1.0.0', + version: '2.0.0', type: 'output', category: 'output', description: - 'Index security findings and workflow outputs into OpenSearch for analytics, dashboards, and alerting.', + 'Index security findings from multiple scanners into OpenSearch for analytics, dashboards, and alerting.', icon: 'BarChart3', author: { name: 'ShipSecAI', @@ -139,16 +161,49 @@ const definition = defineComponent({ isLatest: true, deprecated: false, examples: [ + 'Aggregate findings from Nuclei, Subfinder, and Prowler into a unified security dashboard.', 'Index subdomain enumeration results for tracking asset discovery over time.', 'Store vulnerability scan findings for correlation and trend analysis.', - 'Aggregate security metrics across multiple workflows into unified dashboards.', ], }, + resolvePorts(params: z.infer) { + const dataInputs = Array.isArray(params.dataInputs) ? params.dataInputs : []; + + const inputShape: Record = {}; + + // Create dynamic input ports from dataInputs parameter + for (const input of dataInputs) { + const id = typeof input?.id === 'string' ? input.id.trim() : ''; + if (!id) { + continue; + } + + const label = typeof input?.label === 'string' ? input.label : id; + const sourceTag = typeof input?.sourceTag === 'string' ? input.sourceTag : undefined; + + const description = sourceTag + ? `Analytics results tagged with '${sourceTag}' in indexed documents.` + : `Analytics results from ${label}.`; + + // Each input port accepts an optional array of analytics results + inputShape[id] = withPortMeta(z.array(analyticsResultSchema()).optional(), { + label, + description, + }); + } + + return { + inputs: inputs(inputShape), + outputs: outputSchema, + }; + }, async execute({ inputs, params }, context) { const { getOpenSearchIndexer } = await import('../../utils/opensearch-indexer'); const indexer = getOpenSearchIndexer(); - const documentCount = inputs.data.length; + const dataInputsMap = new Map( + (params.dataInputs ?? []).map((d) => [d.id, d]), + ); // Check if indexing is enabled if (!indexer.isEnabled()) { @@ -157,7 +212,7 @@ const definition = defineComponent({ ); return { indexed: false, - documentCount, + documentCount: 0, indexName: '', }; } @@ -178,32 +233,87 @@ const definition = defineComponent({ }; } - // Runtime validation of analytics result contract - const validated = z.array(analyticsResultSchema()).safeParse(inputs.data); - if (!validated.success) { - const errorMessage = `Invalid analytics results format: ${validated.error.message}`; - context.logger.error(`[Analytics Sink] ${errorMessage}`); - if (params.failOnError) { - throw new Error(errorMessage); + // STRICT MODE: Require all configured inputs to be present + if (params.failOnError) { + for (const inputDef of params.dataInputs ?? []) { + const inputData = (inputs as Record)[inputDef.id]; + if (!inputData || !Array.isArray(inputData) || inputData.length === 0) { + throw new ValidationError( + `Required input '${inputDef.label}' (${inputDef.id}) is missing or empty. ` + + `All configured inputs must provide data when strict mode is enabled.`, + { + fieldErrors: { [inputDef.id]: ['This input is required but has no data'] }, + }, + ); + } + } + } + + // Aggregate all documents from all inputs + const allDocuments: Array> = []; + const inputsRecord = inputs as Record; + + for (const [inputId, inputData] of Object.entries(inputsRecord)) { + if (!inputData || !Array.isArray(inputData)) { + if (!params.failOnError) { + context.logger.warn(`[Analytics Sink] Input '${inputId}' is empty or undefined, skipping`); + } + continue; } + + const inputDef = dataInputsMap.get(inputId); + const sourceTag = inputDef?.sourceTag; + + for (const doc of inputData) { + // STRICT MODE: Validate each document against analytics schema + if (params.failOnError) { + const validated = analyticsResultSchema().safeParse(doc); + if (!validated.success) { + throw new ValidationError( + `Document from input '${inputDef?.label ?? inputId}' failed validation: ${validated.error.message}`, + { + fieldErrors: { [inputId]: [validated.error.message] }, + }, + ); + } + } + + // Add source_input field if sourceTag is defined + const enrichedDoc = sourceTag ? { ...doc, source_input: sourceTag } : { ...doc }; + allDocuments.push(enrichedDoc); + } + } + + const documentCount = allDocuments.length; + + if (documentCount === 0) { + context.logger.info('[Analytics Sink] No documents to index from any input'); return { indexed: false, - documentCount, + documentCount: 0, indexName: '', }; } + // LENIENT MODE: Validate all documents (but don't fail, just log warnings) + if (!params.failOnError) { + const validated = z.array(analyticsResultSchema()).safeParse(allDocuments); + if (!validated.success) { + context.logger.warn( + `[Analytics Sink] Some documents have validation issues: ${validated.error.message}`, + ); + // Continue anyway in lenient mode + } + } + try { // Determine the actual asset key field to use let assetKeyField: string | undefined; if (params.assetKeyField === 'auto') { - // Auto-detect mode: let the indexer determine the asset key field assetKeyField = undefined; } else if (params.assetKeyField === 'custom') { - // Custom mode: use the custom field name if provided assetKeyField = params.customAssetKeyField; } else { - // Specific field selected assetKeyField = params.assetKeyField; } @@ -218,8 +328,10 @@ const definition = defineComponent({ trace: context.trace, }; - context.logger.info(`[Analytics Sink] Bulk indexing ${documentCount} documents`); - const result = await indexer.bulkIndex(context.organizationId, validated.data, indexOptions); + context.logger.info( + `[Analytics Sink] Bulk indexing ${documentCount} documents from ${dataInputsMap.size} input(s)`, + ); + const result = await indexer.bulkIndex(context.organizationId, allDocuments, indexOptions); context.logger.info( `[Analytics Sink] Successfully indexed ${result.documentCount} document(s) to ${result.indexName}`, diff --git a/worker/src/components/security/__tests__/dnsx.test.ts b/worker/src/components/security/__tests__/dnsx.test.ts index 029e9368..14ca6e17 100644 --- a/worker/src/components/security/__tests__/dnsx.test.ts +++ b/worker/src/components/security/__tests__/dnsx.test.ts @@ -82,9 +82,9 @@ describe('dnsx component', () => { expect(result.domainCount).toBe(1); expect(result.recordCount).toBe(2); - expect(result.results).toHaveLength(2); - expect(result.results[0].host).toBe('example.com'); - const aggregatedAnswers = result.results.flatMap((entry) => entry.answers.a ?? []); + expect(result.dnsRecords).toHaveLength(2); + expect((result.dnsRecords[0] as { host: string }).host).toBe('example.com'); + const aggregatedAnswers = result.dnsRecords.flatMap((entry) => (entry as { answers: { a?: string[] } }).answers.a ?? []); expect(aggregatedAnswers).toEqual(['23.215.0.138', '23.215.0.136']); expect(result.recordTypes).toEqual(['A']); expect(result.resolvedHosts).toEqual(['example.com']); diff --git a/worker/src/components/security/__tests__/httpx.test.ts b/worker/src/components/security/__tests__/httpx.test.ts index 83282c84..8f7f45f8 100644 --- a/worker/src/components/security/__tests__/httpx.test.ts +++ b/worker/src/components/security/__tests__/httpx.test.ts @@ -85,7 +85,7 @@ describeHttpx('httpx component', () => { }); const payload: HttpxOutput = { - results: [ + responses: [ { url: 'https://example.com', host: 'example.com', @@ -105,6 +105,7 @@ describeHttpx('httpx component', () => { timestamp: '2023-01-01T00:00:00Z', }, ], + results: [], rawOutput: '{"url":"https://example.com","host":"example.com","status-code":200,"title":"Example Domain","tech":["HTTP","CDN"]}', targetCount: 1, diff --git a/worker/src/components/security/abuseipdb.ts b/worker/src/components/security/abuseipdb.ts index 81802044..1b06573a 100644 --- a/worker/src/components/security/abuseipdb.ts +++ b/worker/src/components/security/abuseipdb.ts @@ -13,6 +13,9 @@ import { param, coerceBooleanFromText, coerceNumberFromText, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; const inputSchema = inputs({ @@ -102,6 +105,11 @@ const outputSchema = outputs({ reason: 'Full AbuseIPDB response payload varies by plan and API version.', connectionType: { kind: 'primitive', name: 'json' }, }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); const abuseIPDBRetryPolicy: ComponentRetryPolicy = { @@ -173,6 +181,7 @@ const definition = defineComponent({ context.logger.warn(`[AbuseIPDB] IP not found: ${ipAddress}`); return { ipAddress, + results: [], abuseConfidenceScore: 0, full_report: { error: 'Not Found' }, }; @@ -186,14 +195,44 @@ const definition = defineComponent({ const data = (await response.json()) as Record; const info = (data.data || {}) as Record; - context.logger.info(`[AbuseIPDB] Score for ${ipAddress}: ${info.abuseConfidenceScore}`); + const abuseConfidenceScore = info.abuseConfidenceScore as number; + + context.logger.info(`[AbuseIPDB] Score for ${ipAddress}: ${abuseConfidenceScore}`); + + // Determine severity based on abuse confidence score + let severity: 'critical' | 'high' | 'medium' | 'low' | 'info' | 'none' = 'none'; + if (abuseConfidenceScore >= 90) { + severity = 'critical'; + } else if (abuseConfidenceScore >= 70) { + severity = 'high'; + } else if (abuseConfidenceScore >= 50) { + severity = 'medium'; + } else if (abuseConfidenceScore >= 25) { + severity = 'low'; + } else if (abuseConfidenceScore > 0) { + severity = 'info'; + } + + // Build analytics-ready results + const analyticsResults: AnalyticsResult[] = [{ + scanner: 'abuseipdb', + finding_hash: generateFindingHash('ip-reputation', ipAddress, String(abuseConfidenceScore)), + severity, + asset_key: ipAddress, + ip_address: ipAddress, + abuse_confidence_score: abuseConfidenceScore, + country_code: info.countryCode as string | undefined, + isp: info.isp as string | undefined, + total_reports: info.totalReports as number | undefined, + }]; return { ipAddress: info.ipAddress as string, + results: analyticsResults, isPublic: info.isPublic as boolean | undefined, ipVersion: info.ipVersion as number | undefined, isWhitelisted: info.isWhitelisted as boolean | undefined, - abuseConfidenceScore: info.abuseConfidenceScore as number, + abuseConfidenceScore, countryCode: info.countryCode as string | undefined, usageType: info.usageType as string | undefined, isp: info.isp as string | undefined, diff --git a/worker/src/components/security/amass.ts b/worker/src/components/security/amass.ts index b0029aed..11e6c8ec 100644 --- a/worker/src/components/security/amass.ts +++ b/worker/src/components/security/amass.ts @@ -11,6 +11,9 @@ import { port, param, type DockerRunnerConfig, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -278,6 +281,11 @@ const outputSchema = outputs({ connectionType: { kind: 'primitive', name: 'json' }, }, ), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); // Split custom CLI flags into an array of arguments @@ -714,12 +722,23 @@ const definition = defineComponent({ }); } + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = subdomains.map((subdomain) => ({ + scanner: 'amass', + finding_hash: generateFindingHash('subdomain-discovery', subdomain, inputs.domains.join(',')), + severity: 'info' as const, + asset_key: subdomain, + subdomain, + parent_domains: inputs.domains, + })); + return { subdomains, rawOutput, domainCount, subdomainCount, options: optionsSummary, + results: analyticsResults, }; }, }); diff --git a/worker/src/components/security/dnsx.ts b/worker/src/components/security/dnsx.ts index 83d82ce7..40164a05 100644 --- a/worker/src/components/security/dnsx.ts +++ b/worker/src/components/security/dnsx.ts @@ -11,6 +11,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -235,8 +238,8 @@ const dnsxLineSchema = z .passthrough(); const outputSchema = outputs({ - results: port(z.array(z.any()), { - label: 'Results', + dnsRecords: port(z.array(z.any()), { + label: 'DNS Records', description: 'DNS resolution results returned by dnsx.', allowAny: true, reason: 'dnsx returns heterogeneous record payloads.', @@ -270,6 +273,11 @@ const outputSchema = outputs({ label: 'Errors', description: 'Errors encountered during dnsx execution.', }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); const splitCliArgs = (input: string): string[] => { @@ -562,6 +570,7 @@ const definition = defineComponent({ if (domainCount === 0) { context.logger.info('[DNSX] Skipping dnsx execution because no domains were provided.'); return outputSchema.parse({ + dnsRecords: [], results: [], rawOutput: '', domainCount: 0, @@ -766,8 +775,20 @@ const definition = defineComponent({ .filter((host): host is string => typeof host === 'string' && host.length > 0), ); + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = normalisedRecords.map((record) => ({ + scanner: 'dnsx', + finding_hash: generateFindingHash('dns-resolution', record.host, JSON.stringify(record.answers)), + severity: 'info' as const, + asset_key: record.host, + host: record.host, + record_types: Object.keys(record.answers), + answers: record.answers, + })); + return { - results: normalisedRecords, + dnsRecords: normalisedRecords, + results: analyticsResults, rawOutput: params.rawOutput, domainCount: params.domainCount, recordCount: params.recordCount, @@ -806,6 +827,7 @@ const definition = defineComponent({ if (trimmed.length === 0) { return { + dnsRecords: [], results: [], rawOutput, domainCount: domainCount, @@ -830,6 +852,7 @@ const definition = defineComponent({ ? (record.domainCount as number) : domainCount; return { + dnsRecords: [], results: [], rawOutput: trimmed, domainCount: errorDomainCount, @@ -844,10 +867,10 @@ const definition = defineComponent({ const validated = outputSchema.safeParse(record); if (validated.success) { return buildOutput({ - records: validated.data.results as z.infer[], + records: validated.data.dnsRecords as z.infer[], rawOutput: validated.data.rawOutput ?? rawOutput, domainCount: validated.data.domainCount ?? domainCount, - recordCount: validated.data.recordCount ?? validated.data.results.length, + recordCount: validated.data.recordCount ?? validated.data.dnsRecords.length, recordTypes: validated.data.recordTypes ?? recordTypes, resolvers: validated.data.resolvers ?? resolverList, errors: validated.data.errors, @@ -865,6 +888,7 @@ const definition = defineComponent({ if (lines.length === 0) { return { + dnsRecords: [], results: [], rawOutput, domainCount: domainCount, @@ -891,8 +915,20 @@ const definition = defineComponent({ }; }); + // Build analytics-ready results + const analyticsResults: AnalyticsResult[] = silentRecords.map((record) => ({ + scanner: 'dnsx', + finding_hash: generateFindingHash('dns-resolution', record.host, JSON.stringify(record.answers)), + severity: 'info' as const, + asset_key: record.host, + host: record.host, + record_types: Object.keys(record.answers), + answers: record.answers, + })); + return { - results: silentRecords, + dnsRecords: silentRecords, + results: analyticsResults, rawOutput, domainCount: domainCount, recordCount: silentRecords.length, @@ -915,6 +951,7 @@ const definition = defineComponent({ if (trimmed.length === 0) { return { + dnsRecords: [], results: [], rawOutput, domainCount: domainCount, @@ -971,8 +1008,20 @@ const definition = defineComponent({ }; }); + // Build analytics-ready results + const analyticsResults: AnalyticsResult[] = fallbackResults.map((record) => ({ + scanner: 'dnsx', + finding_hash: generateFindingHash('dns-resolution', record.host, JSON.stringify(record.answers)), + severity: 'info' as const, + asset_key: record.host, + host: record.host, + record_types: Object.keys(record.answers), + answers: record.answers, + })); + return { - results: fallbackResults, + dnsRecords: fallbackResults, + results: analyticsResults, rawOutput, domainCount: domainCount, recordCount: fallbackResults.length, @@ -1012,6 +1061,7 @@ const definition = defineComponent({ : JSON.stringify(rawPayload, null, 2).slice(0, 5000); return { + dnsRecords: [], results: [], rawOutput, domainCount: domainCount, @@ -1024,10 +1074,10 @@ const definition = defineComponent({ } return buildOutput({ - records: safeResult.data.results as z.infer[], + records: safeResult.data.dnsRecords as z.infer[], rawOutput: safeResult.data.rawOutput, domainCount: safeResult.data.domainCount ?? domainCount, - recordCount: safeResult.data.recordCount ?? safeResult.data.results.length, + recordCount: safeResult.data.recordCount ?? safeResult.data.dnsRecords.length, recordTypes: safeResult.data.recordTypes, resolvers: safeResult.data.resolvers, errors: safeResult.data.errors, diff --git a/worker/src/components/security/httpx.ts b/worker/src/components/security/httpx.ts index 200f8aa4..01cad283 100644 --- a/worker/src/components/security/httpx.ts +++ b/worker/src/components/security/httpx.ts @@ -10,6 +10,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -145,7 +148,7 @@ const findingSchema = z.object({ type Finding = z.infer; const outputSchema = outputs({ - results: port(z.array(findingSchema), { + responses: port(z.array(findingSchema), { label: 'HTTP Responses', description: 'Structured metadata for each responsive endpoint.', connectionType: { kind: 'list', element: { kind: 'primitive', name: 'json' } }, @@ -178,6 +181,11 @@ const outputSchema = outputs({ connectionType: { kind: 'primitive', name: 'json' }, }, ), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); const httpxRunnerOutputSchema = z.object({ @@ -268,6 +276,7 @@ const definition = defineComponent({ if (runnerParams.targets.length === 0) { context.logger.info('[httpx] Skipping httpx probe because no targets were provided.'); const emptyOutput: Output = { + responses: [], results: [], rawOutput: '', targetCount: 0, @@ -391,8 +400,23 @@ const definition = defineComponent({ `[httpx] Completed probe with ${findings.length} result(s) from ${runnerParams.targets.length} target(s)`, ); + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = findings.map((finding) => ({ + scanner: 'httpx', + finding_hash: generateFindingHash('http-endpoint', finding.url, String(finding.statusCode ?? 0)), + severity: 'info' as const, + asset_key: finding.url, + url: finding.url, + host: finding.host, + status_code: finding.statusCode, + title: finding.title, + webserver: finding.webserver, + technologies: finding.technologies, + })); + const output: Output = { - results: findings, + responses: findings, + results: analyticsResults, rawOutput: runnerOutput, targetCount: runnerParams.targets.length, resultCount: findings.length, diff --git a/worker/src/components/security/naabu.ts b/worker/src/components/security/naabu.ts index f7ac5a2a..ebec7759 100644 --- a/worker/src/components/security/naabu.ts +++ b/worker/src/components/security/naabu.ts @@ -8,6 +8,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; const inputSchema = inputs({ @@ -160,6 +163,11 @@ const outputSchema = outputs({ connectionType: { kind: 'primitive', name: 'json' }, }, ), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); type Finding = z.infer; @@ -334,8 +342,22 @@ eval "$CMD" if (typeof result === 'string') { const findings = parseNaabuOutput(result); + + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = findings.map((finding) => ({ + scanner: 'naabu', + finding_hash: generateFindingHash('open-port', finding.host, String(finding.port)), + severity: 'info' as const, + asset_key: `${finding.host}:${finding.port}`, + host: finding.host, + port: finding.port, + protocol: finding.protocol, + ip: finding.ip, + })); + const output: Output = { findings, + results: analyticsResults, rawOutput: result, targetCount: runnerParams.targets.length, openPortCount: findings.length, @@ -361,6 +383,7 @@ eval "$CMD" return { findings: [], + results: [], rawOutput: typeof result === 'string' ? result : '', targetCount: runnerParams.targets.length, openPortCount: 0, diff --git a/worker/src/components/security/prowler-scan.ts b/worker/src/components/security/prowler-scan.ts index 69452a8b..155d8db3 100644 --- a/worker/src/components/security/prowler-scan.ts +++ b/worker/src/components/security/prowler-scan.ts @@ -14,6 +14,9 @@ import { parameters, port, param, + analyticsResultSchema, + generateFindingHash, + type AnalyticsResult, } from '@shipsec/component-sdk'; import type { DockerRunnerConfig } from '@shipsec/component-sdk'; @@ -247,6 +250,11 @@ const outputSchema = outputs({ 'Array of normalized findings derived from Prowler ASFF output (includes severity, resource id, remediation).', connectionType: { kind: 'list', element: { kind: 'primitive', name: 'json' } }, }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), rawOutput: port(z.string(), { label: 'Raw Output', description: 'Raw Prowler output for debugging.', @@ -730,9 +738,29 @@ const definition = defineComponent({ const scanId = buildScanId(parsedInputs.accountId, parsedParams.scanMode); + // Build analytics-ready results (follows core.analytics.result.v1 contract) + const results: AnalyticsResult[] = findings.map((finding) => ({ + scanner: 'prowler', + finding_hash: generateFindingHash( + finding.id, + finding.resourceId ?? finding.accountId ?? '', + finding.title ?? '', + ), + severity: mapToAnalyticsSeverity(finding.severity), + asset_key: finding.resourceId ?? finding.accountId ?? undefined, + // Include additional context for analytics + title: finding.title, + description: finding.description, + region: finding.region, + status: finding.status, + remediationText: finding.remediationText, + recommendationUrl: finding.recommendationUrl, + })); + const output: Output = { scanId, findings, + results, rawOutput: rawSegments.join('\n'), summary: { totalFindings: findings.length, @@ -939,6 +967,31 @@ function extractRegionFromArn(resourceId?: string): string | null { return null; } +/** + * Maps Prowler severity levels to analytics severity enum. + * Prowler: critical, high, medium, low, informational, unknown + * Analytics: critical, high, medium, low, info, none + */ +function mapToAnalyticsSeverity( + prowlerSeverity: NormalisedSeverity, +): 'critical' | 'high' | 'medium' | 'low' | 'info' | 'none' { + switch (prowlerSeverity) { + case 'critical': + return 'critical'; + case 'high': + return 'high'; + case 'medium': + return 'medium'; + case 'low': + return 'low'; + case 'informational': + return 'info'; + case 'unknown': + default: + return 'none'; + } +} + componentRegistry.register(definition); // Create local type aliases for backward compatibility diff --git a/worker/src/components/security/shuffledns-massdns.ts b/worker/src/components/security/shuffledns-massdns.ts index fff8e396..d3690bea 100644 --- a/worker/src/components/security/shuffledns-massdns.ts +++ b/worker/src/components/security/shuffledns-massdns.ts @@ -12,6 +12,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -162,6 +165,11 @@ const outputSchema = outputs({ label: 'Subdomain Count', description: 'Number of unique subdomains discovered.', }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); const definition = defineComponent({ @@ -371,8 +379,19 @@ const definition = defineComponent({ const deduped = Array.from(new Set(subdomains)); + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = deduped.map((subdomain) => ({ + scanner: 'shuffledns', + finding_hash: generateFindingHash('subdomain-discovery', subdomain, domains.join(',')), + severity: 'info' as const, + asset_key: subdomain, + subdomain, + parent_domains: domains, + })); + return outputSchema.parse({ subdomains: deduped, + results: analyticsResults, rawOutput, domainCount: domains.length, subdomainCount: deduped.length, @@ -397,17 +416,31 @@ const definition = defineComponent({ .map((line) => line.trim()) .filter((line) => line.length > 0); + const deduped = Array.from(new Set(subdomainsValue)); + + // Build analytics-ready results + const analyticsResults: AnalyticsResult[] = deduped.map((subdomain) => ({ + scanner: 'shuffledns', + finding_hash: generateFindingHash('subdomain-discovery', subdomain, domains.join(',')), + severity: 'info' as const, + asset_key: subdomain, + subdomain, + parent_domains: domains, + })); + return outputSchema.parse({ - subdomains: Array.from(new Set(subdomainsValue)), + subdomains: deduped, + results: analyticsResults, rawOutput: maybeRaw || subdomainsValue.join('\n'), domainCount: domains.length, - subdomainCount: subdomainsValue.length, + subdomainCount: deduped.length, }); } // Fallback – empty return outputSchema.parse({ subdomains: [], + results: [], rawOutput: '', domainCount: domains.length, subdomainCount: 0, diff --git a/worker/src/components/security/subfinder.ts b/worker/src/components/security/subfinder.ts index d798af6b..06b26dae 100644 --- a/worker/src/components/security/subfinder.ts +++ b/worker/src/components/security/subfinder.ts @@ -11,6 +11,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -123,6 +126,11 @@ const outputSchema = outputs({ label: 'Subdomain Count', description: 'Number of subdomains discovered.', }), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); // Split custom CLI flags into an array of arguments @@ -356,6 +364,7 @@ const definition = defineComponent({ context.logger.info('[Subfinder] Skipping execution because no domains were provided.'); return { subdomains: [], + results: [], rawOutput: '', domainCount: 0, subdomainCount: 0, @@ -507,11 +516,22 @@ const definition = defineComponent({ }); } + // Build analytics-ready results with scanner metadata + const analyticsResults: AnalyticsResult[] = subdomains.map((subdomain) => ({ + scanner: 'subfinder', + finding_hash: generateFindingHash('subdomain-discovery', subdomain, domains.join(',')), + severity: 'info' as const, + asset_key: subdomain, + subdomain, + parent_domains: domains, + })); + return { subdomains, rawOutput, domainCount, subdomainCount, + results: analyticsResults, }; }, }); diff --git a/worker/src/components/security/virustotal.ts b/worker/src/components/security/virustotal.ts index ae83dae3..23d0cb1b 100644 --- a/worker/src/components/security/virustotal.ts +++ b/worker/src/components/security/virustotal.ts @@ -11,6 +11,9 @@ import { parameters, port, param, + generateFindingHash, + analyticsResultSchema, + type AnalyticsResult, } from '@shipsec/component-sdk'; const inputSchema = inputs({ @@ -64,6 +67,11 @@ const outputSchema = outputs({ connectionType: { kind: 'primitive', name: 'json' }, }, ), + results: port(z.array(analyticsResultSchema()), { + label: 'Results', + description: + 'Analytics-ready findings with scanner, finding_hash, and severity. Connect to Analytics Sink.', + }), }); // Retry policy for VirusTotal API - handles rate limits and transient failures @@ -162,6 +170,7 @@ const definition = defineComponent({ suspicious: 0, harmless: 0, tags: [], + results: [], full_report: { error: 'Not Found in VirusTotal' }, }; } @@ -185,12 +194,42 @@ const definition = defineComponent({ `[VirusTotal] Results for ${indicator}: ${malicious} malicious, ${suspicious} suspicious.`, ); + // Determine severity based on malicious/suspicious counts + let severity: 'critical' | 'high' | 'medium' | 'low' | 'info' | 'none' = 'none'; + if (malicious >= 10) { + severity = 'critical'; + } else if (malicious >= 5) { + severity = 'high'; + } else if (malicious >= 1 || suspicious >= 5) { + severity = 'medium'; + } else if (suspicious >= 1) { + severity = 'low'; + } else { + severity = 'info'; + } + + // Build analytics-ready results + const analyticsResults: AnalyticsResult[] = [{ + scanner: 'virustotal', + finding_hash: generateFindingHash('threat-intelligence', indicator, type), + severity, + asset_key: indicator, + indicator, + indicator_type: type, + malicious_count: malicious, + suspicious_count: suspicious, + harmless_count: harmless, + reputation, + tags, + }]; + return { malicious, suspicious, harmless, tags, reputation, + results: analyticsResults, full_report: data, }; }, diff --git a/worker/src/utils/opensearch-indexer.ts b/worker/src/utils/opensearch-indexer.ts index 4dfa51e4..b4dd6525 100644 --- a/worker/src/utils/opensearch-indexer.ts +++ b/worker/src/utils/opensearch-indexer.ts @@ -48,12 +48,20 @@ async function retryWithBackoff(operation: () => Promise, operationName: s export class OpenSearchIndexer { private client: Client | null = null; private enabled = false; + private dashboardsUrl: string | null = null; + private dashboardsAuth: { username: string; password: string } | null = null; constructor() { const url = process.env.OPENSEARCH_URL; const username = process.env.OPENSEARCH_USERNAME; const password = process.env.OPENSEARCH_PASSWORD; + // OpenSearch Dashboards URL for index pattern management + this.dashboardsUrl = process.env.OPENSEARCH_DASHBOARDS_URL || null; + if (username && password) { + this.dashboardsAuth = { username, password }; + } + if (url) { try { this.client = new Client({ @@ -88,21 +96,8 @@ export class OpenSearchIndexer { * Preserves primitive values (string, number, boolean, null) as-is. */ private serializeNestedFields(document: Record): Record { - const result: Record = {}; - - for (const [key, value] of Object.entries(document)) { - if (value === null || value === undefined) { - result[key] = value; - } else if (typeof value === 'object') { - // Serialize objects and arrays to JSON strings - result[key] = JSON.stringify(value); - } else { - // Preserve primitives (string, number, boolean) - result[key] = value; - } - } - - return result; + // Pass through as-is - let OpenSearch handle dynamic mapping + return { ...document }; } /** @@ -302,6 +297,9 @@ export class OpenSearchIndexer { } } + // Refresh index pattern in OpenSearch Dashboards to make new fields visible + await this.refreshIndexPattern(); + return { indexName, documentCount: documents.length }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); @@ -325,6 +323,84 @@ export class OpenSearchIndexer { } } + /** + * Refresh the index pattern in OpenSearch Dashboards to make new fields visible. + * Two-step process: + * 1. Get fresh field mappings from OpenSearch via _fields_for_wildcard API + * 2. Update the saved index pattern object with the new fields + * Fails silently if Dashboards URL is not configured or refresh fails. + */ + private async refreshIndexPattern(): Promise { + if (!this.dashboardsUrl) { + console.debug('[OpenSearchIndexer] Dashboards URL not configured, skipping index pattern refresh'); + return; + } + + const indexPatternId = 'security-findings-*'; + + try { + const headers: Record = { + 'Content-Type': 'application/json', + 'osd-xsrf': 'true', // Required by OpenSearch Dashboards + }; + + // Add basic auth if credentials are available + if (this.dashboardsAuth) { + const authString = Buffer.from( + `${this.dashboardsAuth.username}:${this.dashboardsAuth.password}`, + ).toString('base64'); + headers['Authorization'] = `Basic ${authString}`; + } + + // Step 1: Get fresh fields from OpenSearch via Dashboards API + const fieldsUrl = `${this.dashboardsUrl}/api/index_patterns/_fields_for_wildcard?pattern=${encodeURIComponent(indexPatternId)}&meta_fields=_source&meta_fields=_id&meta_fields=_type&meta_fields=_index&meta_fields=_score`; + const fieldsResponse = await fetch(fieldsUrl, { method: 'GET', headers }); + + if (!fieldsResponse.ok) { + console.warn(`[OpenSearchIndexer] Failed to get fresh fields: ${fieldsResponse.status}`); + return; + } + + const fieldsData = await fieldsResponse.json(); + const freshFields = fieldsData.fields || []; + + // Step 2: Get current index pattern to preserve other attributes + const patternUrl = `${this.dashboardsUrl}/api/saved_objects/index-pattern/${encodeURIComponent(indexPatternId)}`; + const patternResponse = await fetch(patternUrl, { method: 'GET', headers }); + + if (!patternResponse.ok) { + console.warn(`[OpenSearchIndexer] Index pattern not found: ${patternResponse.status}`); + return; + } + + const patternData = await patternResponse.json(); + + // Step 3: Update the index pattern with fresh fields + // Include version for optimistic concurrency control (matches UI behavior) + const updateResponse = await fetch(patternUrl, { + method: 'PUT', + headers, + body: JSON.stringify({ + attributes: { + title: patternData.attributes.title, + timeFieldName: patternData.attributes.timeFieldName, + fields: JSON.stringify(freshFields), + }, + version: patternData.version, + }), + }); + + if (updateResponse.ok) { + console.debug(`[OpenSearchIndexer] Index pattern fields refreshed (${freshFields.length} fields)`); + } else { + console.warn(`[OpenSearchIndexer] Failed to update index pattern: ${updateResponse.status}`); + } + } catch (error) { + // Non-critical failure - log but don't throw + console.warn('[OpenSearchIndexer] Failed to refresh index pattern:', error); + } + } + private buildIndexName(orgId: string, indexSuffix?: string): string { const date = new Date(); const year = date.getFullYear(); From 5d92c8d3089e2e53013a477bea31a2c1a0eb8222 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 14:38:50 -0500 Subject: [PATCH 08/14] fix: tighten analytics validation and indexing Signed-off-by: Aseem Shrey --- backend/.env.example | 2 + backend/scripts/setup-opensearch.ts | 46 +++++++++---------- backend/src/analytics/analytics.controller.ts | 30 ++++++++++++ .../src/analytics/dto/analytics-query.dto.ts | 4 ++ .../analytics/dto/analytics-settings.dto.ts | 7 ++- .../analytics/security-analytics.service.ts | 4 ++ backend/src/auth/session.utils.ts | 30 +++++++++--- docs/analytics.md | 18 ++++++++ docs/components/core.mdx | 2 +- docs/development/workflow-analytics.mdx | 22 ++++++++- worker/src/components/core/analytics-sink.ts | 19 ++++++-- 11 files changed, 146 insertions(+), 38 deletions(-) diff --git a/backend/.env.example b/backend/.env.example index 6e4a2a47..62a2f496 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -32,6 +32,8 @@ AUTH_PROVIDER="local" # If AUTH_LOCAL_ALLOW_UNAUTHENTICATED=false, clients must present AUTH_LOCAL_API_KEY in the Authorization header. AUTH_LOCAL_ALLOW_UNAUTHENTICATED="true" AUTH_LOCAL_API_KEY="" +# Required in production for session auth cookie signing +SESSION_SECRET="" # Clerk provider options # Required when AUTH_PROVIDER="clerk" diff --git a/backend/scripts/setup-opensearch.ts b/backend/scripts/setup-opensearch.ts index 7ce319fb..bb4646e0 100644 --- a/backend/scripts/setup-opensearch.ts +++ b/backend/scripts/setup-opensearch.ts @@ -44,30 +44,25 @@ async function main() { }, mappings: { properties: { - '@timestamp': { - type: 'date', - }, - workflow_id: { - type: 'keyword', - }, - workflow_name: { - type: 'keyword', - }, - run_id: { - type: 'keyword', - }, - node_ref: { - type: 'keyword', - }, - component_id: { - type: 'keyword', - }, - asset_key: { - type: 'keyword', - }, - data: { + '@timestamp': { type: 'date' }, + // Root-level analytics fields + scanner: { type: 'keyword' }, + severity: { type: 'keyword' }, + finding_hash: { type: 'keyword' }, + asset_key: { type: 'keyword' }, + // Workflow context under shipsec namespace + shipsec: { type: 'object', dynamic: true, + properties: { + organization_id: { type: 'keyword' }, + run_id: { type: 'keyword' }, + workflow_id: { type: 'keyword' }, + workflow_name: { type: 'keyword' }, + component_id: { type: 'keyword' }, + node_ref: { type: 'keyword' }, + asset_key: { type: 'keyword' }, + }, }, }, }, @@ -79,9 +74,10 @@ async function main() { console.log('\n📊 Template configuration:'); console.log(' - Index pattern: security-findings-*'); console.log(' - Shards: 1, Replicas: 1'); - console.log(' - Mappings: @timestamp (date), workflow_id (keyword), workflow_name (keyword),'); - console.log(' run_id (keyword), node_ref (keyword), component_id (keyword),'); - console.log(' asset_key (keyword), data (object with dynamic: true)'); + console.log(' - Mappings: @timestamp (date)'); + console.log(' root: scanner, severity, finding_hash, asset_key (keyword)'); + console.log(' shipsec.*: organization_id, run_id, workflow_id, workflow_name,'); + console.log(' component_id, node_ref, asset_key (keyword)'); console.log('\n🎉 OpenSearch setup completed successfully!'); } catch (error) { console.error('❌ OpenSearch setup failed'); diff --git a/backend/src/analytics/analytics.controller.ts b/backend/src/analytics/analytics.controller.ts index 88275961..781c20a4 100644 --- a/backend/src/analytics/analytics.controller.ts +++ b/backend/src/analytics/analytics.controller.ts @@ -22,6 +22,13 @@ import { import { CurrentAuth } from '../auth/auth-context.decorator'; import type { AuthContext } from '../auth/types'; +const MAX_QUERY_SIZE = 1000; +const MAX_QUERY_FROM = 10000; + +function isValidNonNegativeInt(value: unknown): value is number { + return typeof value === 'number' && Number.isInteger(value) && value >= 0; +} + @ApiTags('analytics') @Controller('analytics') export class AnalyticsController { @@ -73,6 +80,22 @@ export class AnalyticsController { const size = queryDto.size ?? 10; const from = queryDto.from ?? 0; + if (!isValidNonNegativeInt(size)) { + throw new BadRequestException('Invalid size: must be a non-negative integer'); + } + + if (!isValidNonNegativeInt(from)) { + throw new BadRequestException('Invalid from: must be a non-negative integer'); + } + + if (size > MAX_QUERY_SIZE) { + throw new BadRequestException(`Invalid size: maximum is ${MAX_QUERY_SIZE}`); + } + + if (from > MAX_QUERY_FROM) { + throw new BadRequestException(`Invalid from: maximum is ${MAX_QUERY_FROM}`); + } + // Call the service to execute the query return this.securityAnalyticsService.query(auth.organizationId, { query: queryDto.query, @@ -154,6 +177,13 @@ export class AnalyticsController { // Validate retention period is within tier limits if (updateDto.analyticsRetentionDays !== undefined) { + if ( + typeof updateDto.analyticsRetentionDays !== 'number' || + !Number.isInteger(updateDto.analyticsRetentionDays) + ) { + throw new BadRequestException('Retention period must be an integer number of days'); + } + const isValid = this.organizationSettingsService.validateRetentionPeriod( tierToValidate, updateDto.analyticsRetentionDays, diff --git a/backend/src/analytics/dto/analytics-query.dto.ts b/backend/src/analytics/dto/analytics-query.dto.ts index dbe54053..969939bd 100644 --- a/backend/src/analytics/dto/analytics-query.dto.ts +++ b/backend/src/analytics/dto/analytics-query.dto.ts @@ -12,6 +12,8 @@ export class AnalyticsQueryRequestDto { description: 'Number of results to return', example: 10, default: 10, + minimum: 0, + maximum: 1000, required: false, }) size?: number; @@ -20,6 +22,8 @@ export class AnalyticsQueryRequestDto { description: 'Offset for pagination', example: 0, default: 0, + minimum: 0, + maximum: 10000, required: false, }) from?: number; diff --git a/backend/src/analytics/dto/analytics-settings.dto.ts b/backend/src/analytics/dto/analytics-settings.dto.ts index 824b302c..ce34c4d3 100644 --- a/backend/src/analytics/dto/analytics-settings.dto.ts +++ b/backend/src/analytics/dto/analytics-settings.dto.ts @@ -1,5 +1,5 @@ import { ApiProperty } from '@nestjs/swagger'; -import { IsEnum, IsInt, Min, Max } from 'class-validator'; +import { IsEnum, IsInt, Min, Max, IsOptional } from 'class-validator'; import type { SubscriptionTier } from '../../database/schema/organization-settings'; export type { SubscriptionTier }; @@ -55,11 +55,13 @@ export class UpdateAnalyticsSettingsDto { example: 30, minimum: 1, maximum: 365, + required: false, }) + @IsOptional() @IsInt() @Min(1) @Max(365) - analyticsRetentionDays!: number; + analyticsRetentionDays?: number; // Optional: allow updating subscription tier (if needed in the future) @ApiProperty({ @@ -67,6 +69,7 @@ export class UpdateAnalyticsSettingsDto { enum: ['free', 'pro', 'enterprise'], required: false, }) + @IsOptional() @IsEnum(['free', 'pro', 'enterprise']) subscriptionTier?: SubscriptionTier; } diff --git a/backend/src/analytics/security-analytics.service.ts b/backend/src/analytics/security-analytics.service.ts index c31b6a42..ce53a645 100644 --- a/backend/src/analytics/security-analytics.service.ts +++ b/backend/src/analytics/security-analytics.service.ts @@ -221,6 +221,10 @@ export class SecurityAnalyticsService { return String(document[explicitField]); } + if (document.asset_key) { + return String(document.asset_key); + } + // Auto-detect from common fields const assetFields = ['host', 'domain', 'subdomain', 'url', 'ip', 'asset', 'target']; diff --git a/backend/src/auth/session.utils.ts b/backend/src/auth/session.utils.ts index 8e3864fd..d7b54319 100644 --- a/backend/src/auth/session.utils.ts +++ b/backend/src/auth/session.utils.ts @@ -4,8 +4,16 @@ import * as crypto from 'crypto'; export const SESSION_COOKIE_NAME = 'shipsec_session'; export const SESSION_COOKIE_MAX_AGE = 7 * 24 * 60 * 60 * 1000; // 7 days -// Secret for signing session tokens (use env var in production) -const SESSION_SECRET = process.env.SESSION_SECRET || 'local-dev-session-secret'; +function getSessionSecret(): string { + const secret = process.env.SESSION_SECRET; + if (!secret) { + if (process.env.NODE_ENV === 'production') { + throw new Error('SESSION_SECRET is required in production for session authentication'); + } + return 'local-dev-session-secret'; + } + return secret; +} export interface SessionPayload { username: string; @@ -16,8 +24,9 @@ export interface SessionPayload { * Create a signed session token for local auth. */ export function createSessionToken(username: string): string { + const secret = getSessionSecret(); const payload = JSON.stringify({ username, ts: Date.now() }); - const hmac = crypto.createHmac('sha256', SESSION_SECRET); + const hmac = crypto.createHmac('sha256', secret); hmac.update(payload); const signature = hmac.digest('hex'); return Buffer.from(`${payload}.${signature}`).toString('base64'); @@ -28,6 +37,7 @@ export function createSessionToken(username: string): string { */ export function verifySessionToken(token: string): SessionPayload | null { try { + const secret = getSessionSecret(); const decoded = Buffer.from(token, 'base64').toString('utf-8'); const lastDot = decoded.lastIndexOf('.'); if (lastDot === -1) return null; @@ -35,13 +45,21 @@ export function verifySessionToken(token: string): SessionPayload | null { const payload = decoded.slice(0, lastDot); const signature = decoded.slice(lastDot + 1); - const hmac = crypto.createHmac('sha256', SESSION_SECRET); + const hmac = crypto.createHmac('sha256', secret); hmac.update(payload); const expectedSignature = hmac.digest('hex'); - if (signature !== expectedSignature) return null; + if (signature.length !== expectedSignature.length) return null; + const signatureMatch = crypto.timingSafeEqual( + Buffer.from(signature), + Buffer.from(expectedSignature), + ); + if (!signatureMatch) return null; - return JSON.parse(payload); + const parsed = JSON.parse(payload) as SessionPayload; + if (typeof parsed.ts !== 'number') return null; + if (Date.now() - parsed.ts > SESSION_COOKIE_MAX_AGE) return null; + return parsed; } catch { return null; } diff --git a/docs/analytics.md b/docs/analytics.md index b432f4be..1006e50e 100644 --- a/docs/analytics.md +++ b/docs/analytics.md @@ -140,6 +140,24 @@ VITE_OPENSEARCH_DASHBOARDS_URL=/analytics 3. **Indexing**: Documents bulk-indexed to OpenSearch via `OPENSEARCH_URL` 4. **Visualization**: Users explore data in OpenSearch Dashboards at `/analytics` +## Analytics API Limits + +To protect OpenSearch and keep queries responsive: + +- `size` must be a non-negative integer and is capped at **1000** +- `from` must be a non-negative integer and is capped at **10000** + +Requests exceeding these limits return `400 Bad Request`. + +## Analytics Settings Updates + +The analytics settings update API supports **partial updates**: + +- `analyticsRetentionDays` is optional +- `subscriptionTier` is optional + +Omit fields you don’t want to change. The backend validates the retention days only when provided. + ## Troubleshooting ### Analytics Sink Not Writing Data diff --git a/docs/components/core.mdx b/docs/components/core.mdx index 626698a8..3ad0c765 100644 --- a/docs/components/core.mdx +++ b/docs/components/core.mdx @@ -226,7 +226,7 @@ Indexes workflow output data into OpenSearch for analytics dashboards, queries, | Parameter | Type | Description | |-----------|------|-------------| -| `indexSuffix` | String | Custom suffix for the index name. Defaults to workflow slug. | +| `indexSuffix` | String | Custom suffix for the index name. Defaults to slugified workflow name. | | `assetKeyField` | Select | Field to use as asset identifier. Options: auto, asset_key, host, domain, subdomain, url, ip, asset, target, custom | | `customAssetKeyField` | String | Custom field name when assetKeyField is "custom" | | `failOnError` | Boolean | When enabled, workflow stops if indexing fails. Default: false (fire-and-forget) | diff --git a/docs/development/workflow-analytics.mdx b/docs/development/workflow-analytics.mdx index 2014e12b..52b612e5 100644 --- a/docs/development/workflow-analytics.mdx +++ b/docs/development/workflow-analytics.mdx @@ -217,6 +217,26 @@ After indexing data, create an index pattern in OpenSearch Dashboards: --- +## Analytics API Limits + +The analytics query API enforces sane bounds to protect OpenSearch: + +- `size` must be a non-negative integer and is capped at **1000** +- `from` must be a non-negative integer and is capped at **10000** + +Requests above these limits return `400 Bad Request`. + +## Analytics Settings Updates + +The analytics settings API supports partial updates: + +- `analyticsRetentionDays` is optional +- `subscriptionTier` is optional + +Omit fields you don’t want to change. The backend validates retention days only when provided. + +--- + ## Using Analytics Sink ### Basic Workflow @@ -230,7 +250,7 @@ After indexing data, create an index pattern in OpenSearch Dashboards: | Parameter | Description | |-----------|-------------| -| **Index Suffix** | Custom suffix for the index name. Defaults to workflow slug. | +| **Index Suffix** | Custom suffix for the index name. Defaults to slugified workflow name. | | **Asset Key Field** | Field to use as asset identifier. Auto-detect checks: asset_key > host > domain > subdomain > url > ip > asset > target | | **Custom Field Name** | Custom field when Asset Key Field is "custom" | | **Fail on Error** | When enabled, workflow stops if indexing fails. Default: fire-and-forget. | diff --git a/worker/src/components/core/analytics-sink.ts b/worker/src/components/core/analytics-sink.ts index 9a456d8d..e4048845 100644 --- a/worker/src/components/core/analytics-sink.ts +++ b/worker/src/components/core/analytics-sink.ts @@ -24,6 +24,16 @@ const dataInputDefinitionSchema = z.object({ type DataInputDefinition = z.infer; +function toWorkflowSlug(value?: string | null): string | undefined { + if (!value) return undefined; + const slug = value + .toLowerCase() + .trim() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); + return slug.length > 0 ? slug : undefined; +} + // Base input schema - will be extended by resolvePorts const baseInputSchema = inputs({}); @@ -61,14 +71,14 @@ const parameterSchema = parameters({ .string() .optional() .describe( - 'Optional suffix to append to the index name. Defaults to workflow slug if not provided.', + 'Optional suffix to append to the index name. Defaults to slugified workflow name if not provided.', ), { label: 'Index Suffix', editor: 'text', placeholder: 'workflow-slug (default)', description: - 'Custom suffix for the index name (e.g., "subdomain-enum"). Defaults to workflow slug if not provided.', + 'Custom suffix for the index name (e.g., "subdomain-enum"). Defaults to slugified workflow name if not provided.', }, ), assetKeyField: param( @@ -317,6 +327,9 @@ const definition = defineComponent({ assetKeyField = params.assetKeyField; } + const fallbackIndexSuffix = + params.indexSuffix ?? toWorkflowSlug(context.workflowName ?? undefined); + const indexOptions = { workflowId: context.workflowId, workflowName: context.workflowName, @@ -324,7 +337,7 @@ const definition = defineComponent({ nodeRef: context.componentRef, componentId: 'core.analytics.sink', assetKeyField, - indexSuffix: params.indexSuffix, + indexSuffix: fallbackIndexSuffix, trace: context.trace, }; From 014757ad0192a7e7a765eeade4d0bef160577693 Mon Sep 17 00:00:00 2001 From: Aseem Shrey Date: Thu, 29 Jan 2026 14:47:33 -0500 Subject: [PATCH 09/14] style: fix prettier formatting in TopBar and AnalyticsInputsEditor Signed-off-by: Aseem Shrey --- frontend/src/components/layout/TopBar.tsx | 70 +++++++++---------- .../workflow/AnalyticsInputsEditor.tsx | 3 +- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/frontend/src/components/layout/TopBar.tsx b/frontend/src/components/layout/TopBar.tsx index 7b788bf9..96b9231a 100644 --- a/frontend/src/components/layout/TopBar.tsx +++ b/frontend/src/components/layout/TopBar.tsx @@ -461,41 +461,41 @@ export function TopBar({ {env.VITE_OPENSEARCH_DASHBOARDS_URL && workflowId && (!selectedRunId || (selectedRunStatus && selectedRunStatus !== 'RUNNING')) && ( - - )} + + )}