-
Notifications
You must be signed in to change notification settings - Fork 8
fix: make exclude patterns recursive to prevent index pollution #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9eafa5c
31fdda5
cafdb95
83ba23e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,5 +1,6 @@ | ||||||
| import chokidar from 'chokidar'; | ||||||
| import path from 'path'; | ||||||
| import { EXCLUDED_GLOB_PATTERNS } from '../constants/codebase-context.js'; | ||||||
| import { getSupportedExtensions } from '../utils/language-detection.js'; | ||||||
|
|
||||||
| export interface FileWatcherOptions { | ||||||
|
|
@@ -43,18 +44,7 @@ export function startFileWatcher(opts: FileWatcherOptions): () => void { | |||||
| }; | ||||||
|
|
||||||
| const watcher = chokidar.watch(rootPath, { | ||||||
| ignored: [ | ||||||
| '**/node_modules/**', | ||||||
| '**/.codebase-context/**', | ||||||
| '**/.git/**', | ||||||
| '**/dist/**', | ||||||
| '**/.nx/**', | ||||||
| '**/.planning/**', | ||||||
| '**/coverage/**', | ||||||
| '**/.turbo/**', | ||||||
| '**/.next/**', | ||||||
| '**/.cache/**' | ||||||
| ], | ||||||
| ignored: [...EXCLUDED_GLOB_PATTERNS], | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||||||
| persistent: true, | ||||||
| ignoreInitial: true, | ||||||
| awaitWriteFinish: { stabilityThreshold: 200, pollInterval: 100 } | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| import { afterEach, describe, expect, it } from 'vitest'; | ||
| import { promises as fs } from 'fs'; | ||
| import os from 'os'; | ||
| import path from 'path'; | ||
| import { CodebaseIndexer } from '../src/core/indexer.js'; | ||
| import { analyzerRegistry } from '../src/core/analyzer-registry.js'; | ||
| import { GenericAnalyzer } from '../src/analyzers/generic/index.js'; | ||
| import { | ||
| CODEBASE_CONTEXT_DIRNAME, | ||
| KEYWORD_INDEX_FILENAME | ||
| } from '../src/constants/codebase-context.js'; | ||
|
|
||
| describe('Indexer exclude patterns — nested directories', () => { | ||
| let tempDir: string; | ||
|
|
||
| afterEach(async () => { | ||
| if (tempDir) await fs.rm(tempDir, { recursive: true, force: true }); | ||
| }); | ||
|
|
||
| it('excludes nested coverage, worktrees, .claude, and dist directories', async () => { | ||
| analyzerRegistry.register(new GenericAnalyzer()); | ||
| tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'indexer-exclude-patterns-')); | ||
|
|
||
| // Legitimate source file | ||
| await fs.mkdir(path.join(tempDir, 'src'), { recursive: true }); | ||
| await fs.writeFile( | ||
| path.join(tempDir, 'src', 'app.ts'), | ||
| 'export function main() { return "hello"; }\n' | ||
| ); | ||
|
|
||
| // Polluters — nested paths that should be excluded | ||
| const polluters = [ | ||
| ['packages', 'ui', 'coverage', 'prettify.js'], | ||
| ['.claude', 'worktrees', 'branch', 'src', 'app.ts'], | ||
| ['worktrees', 'portal30-pr', 'src', 'real.ts'], | ||
| ['apps', 'web', 'dist', 'bundle.js'] | ||
| ]; | ||
|
|
||
| for (const segments of polluters) { | ||
| const dir = path.join(tempDir, ...segments.slice(0, -1)); | ||
| await fs.mkdir(dir, { recursive: true }); | ||
| await fs.writeFile(path.join(tempDir, ...segments), 'export const polluter = true;\n'); | ||
| } | ||
|
|
||
| const indexer = new CodebaseIndexer({ | ||
| rootPath: tempDir, | ||
| config: { | ||
| skipEmbedding: true, | ||
| parsing: { | ||
| maxFileSize: 1048576, | ||
| chunkSize: 50, | ||
| chunkOverlap: 0, | ||
| parseTests: true, | ||
| parseNodeModules: false | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| await indexer.index(); | ||
|
|
||
| const indexPath = path.join(tempDir, CODEBASE_CONTEXT_DIRNAME, KEYWORD_INDEX_FILENAME); | ||
| const indexRaw = JSON.parse(await fs.readFile(indexPath, 'utf-8')) as Record<string, unknown>; | ||
|
|
||
| let chunks: Array<{ filePath: string }>; | ||
| if (Array.isArray(indexRaw)) { | ||
| chunks = indexRaw; | ||
| } else if (Array.isArray(indexRaw?.chunks)) { | ||
| chunks = indexRaw.chunks as Array<{ filePath: string }>; | ||
| } else { | ||
| throw new Error( | ||
| `Unexpected index format: keys=${JSON.stringify(Object.keys(indexRaw ?? {}))}` | ||
| ); | ||
| } | ||
|
|
||
| const indexedPaths = chunks.map((chunk) => chunk.filePath); | ||
|
|
||
| // The legitimate file must be indexed | ||
| expect(indexedPaths.some((p) => p.includes('src/app.ts') || p.includes('src\\app.ts'))).toBe( | ||
| true | ||
| ); | ||
|
|
||
| // None of the polluter paths should appear | ||
| const polluterMarkers = ['coverage', '.claude', 'worktrees', 'dist']; | ||
| for (const marker of polluterMarkers) { | ||
| const leaked = indexedPaths.filter((p) => p.includes(marker)); | ||
| expect(leaked, `paths containing "${marker}" should not be indexed`).toEqual([]); | ||
| } | ||
| }); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding
'build'to the shared recursive exclude list meansmergeConfig()now passes**/build/**toscanFiles()(src/core/indexer.ts:278,1033-1039) andstartFileWatcher()ignores the same paths (src/core/file-watcher.ts:46-50). In monorepos that keep checked-in tooling underpackages/*/build/or similar, those source files will silently disappear from search and auto-refresh even though the oldbuild/**rule only skipped the repository root’s build output.Useful? React with 👍 / 👎.