diff --git a/src/codegen/git/utils/language.py b/src/codegen/git/utils/language.py index 551ac4212..9f3b2583f 100644 --- a/src/codegen/git/utils/language.py +++ b/src/codegen/git/utils/language.py @@ -133,7 +133,11 @@ def _determine_language_by_git_file_count(folder_path: str) -> ProgrammingLangua repo_operator = RepoOperator(repo_config=repo_config) # Walk through the directory - for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST): + for rel_path, _ in repo_operator.iter_files( + subdirs=[base_path] if base_path else None, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + skip_content=True, + ): # Convert to Path object file_path = Path(git_root) / Path(rel_path) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 7f1fdc29e..feabf6627 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -225,7 +225,12 @@ def build_graph(self, repo_operator: RepoOperator) -> None: # =====[ Add all files to the graph in parallel ]===== syncs = defaultdict(lambda: []) if not self.config.disable_file_parse: - for filepath, _ in repo_operator.iter_files(subdirs=self.projects[0].subdirectories, extensions=self.extensions, ignore_list=GLOBAL_FILE_IGNORE_LIST): + for filepath, _ in repo_operator.iter_files( + subdirs=self.projects[0].subdirectories, + extensions=self.extensions, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + skip_content=True, + ): syncs[SyncType.ADD].append(self.to_absolute(filepath)) logger.info(f"> Parsing {len(syncs[SyncType.ADD])} files in {self.projects[0].subdirectories or 'ALL'} subdirectories with {self.extensions} extensions") self._process_diff_files(syncs, incremental=False) diff --git a/src/codegen/sdk/codebase/validation.py b/src/codegen/sdk/codebase/validation.py index 54d04228a..807d7f3a5 100644 --- a/src/codegen/sdk/codebase/validation.py +++ b/src/codegen/sdk/codebase/validation.py @@ -37,7 +37,16 @@ def post_init_validation(codebase: CodebaseType) -> PostInitValidationStatus: return PostInitValidationStatus.NO_NODES # Verify the graph has the same number of files as there are in the repo - if len(codebase.files) != len(list(codebase.op.iter_files(codebase.ctx.projects[0].subdirectories, extensions=codebase.ctx.extensions, ignore_list=GLOBAL_FILE_IGNORE_LIST))): + if len(codebase.files) != len( + list( + codebase.op.iter_files( + codebase.ctx.projects[0].subdirectories, + extensions=codebase.ctx.extensions, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + skip_content=True, + ) + ) + ): return PostInitValidationStatus.MISSING_FILES # Verify import resolution diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 735a8b5ea..0b55b04de 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -309,6 +309,7 @@ def files(self, *, extensions: list[str] | Literal["*"] | None = None) -> list[T for filepath, _ in self._op.iter_files( extensions=None if extensions == "*" else extensions, ignore_list=GLOBAL_FILE_IGNORE_LIST, + skip_content=True, ): files.append(self.get_file(filepath, optional=False)) # Sort files alphabetically