Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 31 additions & 103 deletions src/lib/build_cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import {z} from 'zod';
import type {Logger} from '@fuzdev/fuz_util/log.js';
import {git_current_commit_hash} from '@fuzdev/fuz_util/git.js';
import {fs_exists} from '@fuzdev/fuz_util/fs.js';
import {map_concurrent} from '@fuzdev/fuz_util/async.js';
import {hash_secure} from '@fuzdev/fuz_util/hash.js';
import {
collect_file_snapshot,
validate_file_snapshot,
type FileSnapshotEntry,
} from '@fuzdev/fuz_util/file_snapshot.js';

import type {GroConfig} from './gro_config.ts';
import {paths} from './paths.ts';
Expand Down Expand Up @@ -148,42 +151,14 @@ export const save_build_cache_metadata = async (
* Validates that a cached build is still valid by checking stats and hashing outputs.
* Uses size as a fast negative check before expensive hashing.
* This is comprehensive validation to catch manual tampering or corruption.
*
* Delegates to `validate_file_snapshot` from `@fuzdev/fuz_util`.
*/
export const validate_build_cache = async (metadata: BuildCacheMetadata): Promise<boolean> => {
// Verify all tracked output files exist and have matching size
// Sequential checks with early return for performance
for (const output of metadata.outputs) {
// eslint-disable-next-line no-await-in-loop
if (!(await fs_exists(output.path))) {
return false;
}

// Fast negative check: size mismatch = definitely invalid
// This avoids expensive file reads and hashing for files that have clearly changed
// eslint-disable-next-line no-await-in-loop
const stats = await stat(output.path);
if (stats.size !== output.size) {
return false;
}
}

// Size matches for all files - now verify content with cryptographic hashing
// Hash files with controlled concurrency (could be 10k+ files)
const results = await map_concurrent(
metadata.outputs,
async (output) => {
try {
const contents = await readFile(output.path);
const actual_hash = await hash_secure(contents);
return actual_hash === output.hash;
} catch {
// File deleted/inaccessible between checks = cache invalid
return false;
}
},
20,
);
return results.every((valid) => valid);
return validate_file_snapshot({
entries: metadata.outputs,
concurrency: 20,
});
};

/**
Expand Down Expand Up @@ -231,83 +206,36 @@ export const is_build_cache_valid = async (
return true;
};

/**
* Maps a `FileSnapshotEntry` (with all fields enabled) to a `BuildOutputEntry`.
*/
const to_build_output_entry = (entry: FileSnapshotEntry): BuildOutputEntry => ({
path: entry.path,
hash: entry.hash!,
size: entry.size!,
mtime: entry.mtime!,
ctime: entry.ctime!,
mode: entry.mode!,
});

/**
* Collects information about all files in build output directories.
* Returns an array of entries with path, hash, size, mtime, ctime, and mode.
*
* Files are hashed in parallel for performance. For very large builds (10k+ files),
* this may take several seconds but ensures complete cache validation.
* Delegates to `collect_file_snapshot` from `@fuzdev/fuz_util`.
*
* @param build_dirs Array of output directories to scan (e.g., ['build', 'dist', 'dist_server'])
*/
export const collect_build_outputs = async (
build_dirs: Array<string>,
): Promise<Array<BuildOutputEntry>> => {
// Collect all files to hash first
interface FileEntry {
full_path: string;
cache_key: string;
}

const files_hash_secure: Array<FileEntry> = [];

// Recursively collect files
const collect_files = async (
dir: string,
relative_base: string,
dir_prefix: string,
): Promise<void> => {
const entries = await readdir(dir, {withFileTypes: true});

for (const entry of entries) {
// Skip metadata file itself
if (entry.name === BUILD_CACHE_METADATA_FILENAME) {
continue;
}

const full_path = join(dir, entry.name);
const relative_path = relative_base ? join(relative_base, entry.name) : entry.name;
const cache_key = join(dir_prefix, relative_path);

if (entry.isDirectory()) {
// eslint-disable-next-line no-await-in-loop
await collect_files(full_path, relative_path, dir_prefix);
} else if (entry.isFile()) {
files_hash_secure.push({full_path, cache_key});
}
// Symlinks are intentionally ignored - we only hash regular files
}
};

// Collect files from all build directories sequentially
for (const build_dir of build_dirs) {
// eslint-disable-next-line no-await-in-loop
if (!(await fs_exists(build_dir))) {
continue; // Skip non-existent directories
}
// eslint-disable-next-line no-await-in-loop
await collect_files(build_dir, '', build_dir);
}

// Hash files with controlled concurrency and collect stats (could be 10k+ files)
return map_concurrent(
files_hash_secure,
async ({full_path, cache_key}): Promise<BuildOutputEntry> => {
const stats = await stat(full_path);
const contents = await readFile(full_path);
const hash = await hash_secure(contents);

return {
path: cache_key,
hash,
size: stats.size,
mtime: stats.mtimeMs,
ctime: stats.ctimeMs,
mode: stats.mode,
};
},
20,
);
const entries = await collect_file_snapshot({
dirs: build_dirs,
fields: {hash: true, size: true, mtime: true, ctime: true, mode: true},
filter: (path) => !path.endsWith(BUILD_CACHE_METADATA_FILENAME),
concurrency: 20,
});
return entries.map(to_build_output_entry);
};

/**
Expand Down
28 changes: 14 additions & 14 deletions src/routes/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -281,34 +281,34 @@
{
"name": "BUILD_CACHE_METADATA_FILENAME",
"kind": "variable",
"source_line": 15,
"source_line": 18,
"type_signature": "\"build.json\""
},
{
"name": "BUILD_CACHE_VERSION",
"kind": "variable",
"source_line": 16,
"source_line": 19,
"type_signature": "\"1\""
},
{
"name": "BuildOutputEntry",
"kind": "type",
"doc_comment": "Metadata about a single build output file.\nIncludes cryptographic hash for validation plus filesystem stats for debugging and optimization.",
"source_line": 22,
"source_line": 25,
"type_signature": "ZodObject<{ path: ZodString; hash: ZodString; size: ZodNumber; mtime: ZodNumber; ctime: ZodNumber; mode: ZodNumber; }, $strict>"
},
{
"name": "BuildCacheMetadata",
"kind": "type",
"doc_comment": "Metadata stored in .gro/ directory to track build cache validity.\nSchema validates structure at load time to catch corrupted cache files.",
"source_line": 40,
"source_line": 43,
"type_signature": "ZodObject<{ version: ZodString; git_commit: ZodNullable<ZodString>; build_cache_config_hash: ZodString; timestamp: ZodString; outputs: ZodArray<...>; }, $strict>"
},
{
"name": "compute_build_cache_key",
"kind": "function",
"doc_comment": "Computes the cache key components for a build.\nThis determines whether a cached build can be reused.",
"source_line": 61,
"source_line": 64,
"type_signature": "(config: GroConfig, log: Logger, git_commit?: string | null | undefined): Promise<{ git_commit: string | null; build_cache_config_hash: string; }>",
"return_type": "Promise<{ git_commit: string | null; build_cache_config_hash: string; }>",
"parameters": [
Expand All @@ -334,7 +334,7 @@
"name": "load_build_cache_metadata",
"kind": "function",
"doc_comment": "Loads build cache metadata from .gro/ directory.\nInvalid or corrupted cache files are automatically deleted.",
"source_line": 86,
"source_line": 89,
"type_signature": "(): Promise<{ version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]; } | null>",
"return_type": "Promise<{ version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]; } | null>",
"parameters": []
Expand All @@ -343,7 +343,7 @@
"name": "save_build_cache_metadata",
"kind": "function",
"doc_comment": "Saves build cache metadata to .gro/ directory.\nErrors are logged but don't fail the build (cache is optional).",
"source_line": 128,
"source_line": 131,
"type_signature": "(metadata: { version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]; }, log?: Logger | undefined): Promise<...>",
"return_type": "Promise<void>",
"parameters": [
Expand All @@ -361,8 +361,8 @@
{
"name": "validate_build_cache",
"kind": "function",
"doc_comment": "Validates that a cached build is still valid by checking stats and hashing outputs.\nUses size as a fast negative check before expensive hashing.\nThis is comprehensive validation to catch manual tampering or corruption.",
"source_line": 152,
"doc_comment": "Validates that a cached build is still valid by checking stats and hashing outputs.\nUses size as a fast negative check before expensive hashing.\nThis is comprehensive validation to catch manual tampering or corruption.\n\nDelegates to `validate_file_snapshot` from `@fuzdev/fuz_util`.",
"source_line": 157,
"type_signature": "(metadata: { version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]; }): Promise<...>",
"return_type": "Promise<boolean>",
"parameters": [
Expand All @@ -376,7 +376,7 @@
"name": "is_build_cache_valid",
"kind": "function",
"doc_comment": "Main function to check if the build cache is valid.\nReturns true if the cached build can be used, false if a fresh build is needed.",
"source_line": 197,
"source_line": 172,
"type_signature": "(config: GroConfig, log: Logger, git_commit?: string | null | undefined): Promise<boolean>",
"return_type": "Promise<boolean>",
"parameters": [
Expand All @@ -401,8 +401,8 @@
{
"name": "collect_build_outputs",
"kind": "function",
"doc_comment": "Collects information about all files in build output directories.\nReturns an array of entries with path, hash, size, mtime, ctime, and mode.\n\nFiles are hashed in parallel for performance. For very large builds (10k+ files),\nthis may take several seconds but ensures complete cache validation.",
"source_line": 243,
"doc_comment": "Collects information about all files in build output directories.\nReturns an array of entries with path, hash, size, mtime, ctime, and mode.\n\nDelegates to `collect_file_snapshot` from `@fuzdev/fuz_util`.",
"source_line": 229,
"type_signature": "(build_dirs: string[]): Promise<{ path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]>",
"return_type": "Promise<{ path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]>",
"parameters": [
Expand All @@ -417,7 +417,7 @@
"name": "discover_build_output_dirs",
"kind": "function",
"doc_comment": "Discovers all build output directories in the current working directory.\nReturns an array of directory names that exist: build/, dist/, dist_*",
"source_line": 317,
"source_line": 245,
"type_signature": "(): Promise<string[]>",
"return_type": "Promise<string[]>",
"parameters": []
Expand All @@ -426,7 +426,7 @@
"name": "create_build_cache_metadata",
"kind": "function",
"doc_comment": "Creates build cache metadata after a successful build.\nAutomatically discovers all build output directories (build/, dist/, dist_*).",
"source_line": 362,
"source_line": 290,
"type_signature": "(config: GroConfig, log: Logger, git_commit?: string | null | undefined, build_dirs?: string[] | undefined): Promise<{ version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { ...; }[]; }>",
"return_type": "Promise<{ version: string; git_commit: string | null; build_cache_config_hash: string; timestamp: string; outputs: { path: string; hash: string; size: number; mtime: number; ctime: number; mode: number; }[]; }>",
"parameters": [
Expand Down
Loading
Loading