Skip to content

Conversation

KennethEnevoldsen
Copy link
Contributor

A minor cleanup of v2

to detect unused import I used the following script:

from __future__ import annotations

import ast
from pathlib import Path


def find_never_imported_files(package_dir):
    """Find Python files that are never imported by other files."""
    package_path = Path(package_dir)
    python_files = list(package_path.rglob("*.py"))
    
    # Track which modules are imported
    imported_modules = set()
    
    for file_path in python_files:
        try:
            with open(file_path, encoding='utf-8') as f:
                tree = ast.parse(f.read())
        except (SyntaxError, UnicodeDecodeError):
            continue
            
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    imported_modules.add(alias.name)
                    
            elif isinstance(node, ast.ImportFrom):
                if node.module:
                    imported_modules.add(node.module)
                    # Also add individual imports
                    for alias in node.names:
                        if node.module:
                            imported_modules.add(f"{node.module}.{alias.name}")
    
    # Find files that are never imported
    never_imported = []
    
    for file_path in python_files:
        # Skip __init__.py files as they're special
        if file_path.name == "__init__.py":
            continue
            
        # Convert file path to potential module names
        relative_path = file_path.relative_to(package_path)
        module_path_parts = relative_path.with_suffix('').parts
        
        # Generate possible import names for this file
        possible_names = set()
        
        # Direct module name
        if len(module_path_parts) == 1:
            possible_names.add(module_path_parts[0])
        else:
            # Package.module format
            possible_names.add('.'.join(module_path_parts))
            # Just the module name
            possible_names.add(module_path_parts[-1])
        
        # Check if any possible name was imported
        if not any(name in imported_modules for name in possible_names):
            # Double-check by looking for any partial matches
            file_stem = file_path.stem
            if not any(file_stem in imp for imp in imported_modules):
                never_imported.append(file_path)
    
    return never_imported

# Usage
unused_files = find_never_imported_files("mteb")
print("Files that are never imported:")
for file in unused_files:
    print(f"  {file}")

@KennethEnevoldsen KennethEnevoldsen requested a review from Samoed July 31, 2025 09:14
@KennethEnevoldsen
Copy link
Contributor Author

Trying to solve tests, I found a few (potential) bugs:

@Samoed
Copy link
Member

Samoed commented Aug 2, 2025

Interesting. I will try to solve it tomorrow

@KennethEnevoldsen KennethEnevoldsen merged commit 8f524b3 into v2.0.0 Aug 3, 2025
9 checks passed
@KennethEnevoldsen KennethEnevoldsen deleted the clean-v2 branch August 3, 2025 14:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants