Source code for pyhiv.cli

"""
PyHIV Command Line Interface
"""
import click
from pathlib import Path
import sys
import time
from pyhiv import __version__
import logging

SUPPORTED_FASTA_EXTENSIONS = {'.fasta', '.fa', '.fna', '.ffn'}


[docs] def validate_n_jobs(ctx, param, value): """Validate that n_jobs is positive if provided.""" if value is not None and value < 1: raise click.BadParameter('must be at least 1') return value
[docs] def count_fasta_files(directory): """Count FASTA files in the input directory.""" return sum(1 for f in Path(directory).rglob('*') if f.is_file() and f.suffix.lower() in SUPPORTED_FASTA_EXTENSIONS)
@click.command() @click.version_option(version=__version__, prog_name="PyHIV") @click.argument( 'fastas_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path, readable=True), required=True ) @click.option( '--subtyping/--no-subtyping', default=True, show_default=True, help='Enable or disable HIV-1 subtyping. When enabled, aligns with reference genomes for subtype identification.' ) @click.option( '--splitting/--no-splitting', default=True, show_default=True, help='Enable or disable gene region splitting. When enabled, splits sequences into gene regions.' ) @click.option( '-o', '--output-dir', type=click.Path(path_type=Path), default=None, help='Output directory for results. Defaults to "PyHIV_results" in the current directory.' ) @click.option( '-j', '--n-jobs', type=int, default=None, callback=validate_n_jobs, help='Number of parallel jobs to run. If not specified, uses all available CPU cores.' ) @click.option( '-v', '--verbose', is_flag=True, help='Enable verbose output.' ) @click.option( '-q', '--quiet', is_flag=True, help='Suppress all non-error output.' ) @click.option( '--reporting/--no-reporting', default=True, show_default=True, help='Enable or disable PDF report generation. When enabled, generates a PDF report with sequence visualizations.' ) def main(fastas_dir, subtyping, splitting, output_dir, n_jobs, verbose, quiet, reporting): """ PyHIV: HIV-1 sequence alignment, subtyping, and gene region splitting tool. FASTAS_DIR: Directory containing input FASTA files to process. \b Examples: # Basic usage with default settings pyhiv /path/to/fastas/ # Disable subtyping pyhiv /path/to/fastas/ --no-subtyping # Custom output directory with 4 parallel jobs pyhiv /path/to/fastas/ -o results/ -j 4 # Only alignment, no splitting pyhiv /path/to/fastas/ --no-splitting # Quiet mode (only show errors) pyhiv /path/to/fastas/ -q """ # Handle conflicting flags if verbose and quiet: raise click.UsageError("Cannot use --verbose and --quiet together") # Configure logging based on flags if quiet: logging_level = logging.ERROR elif verbose: logging_level = logging.DEBUG else: logging_level = logging.INFO logging.basicConfig( level=logging_level, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) # Set output directory output_path = output_dir or Path('PyHIV_results') # Check if output directory exists and warn user if output_path.exists() and not quiet: click.secho(f"Warning: Output directory '{output_path}' already exists. Files may be overwritten.", fg='yellow', err=True) # Count input files num_files = count_fasta_files(fastas_dir) if num_files == 0: click.secho("Error: No FASTA files found in the input directory.", fg='red', err=True) sys.exit(1) if verbose: click.echo(f"PyHIV v{__version__}") click.echo(f"Input directory: {fastas_dir}") click.echo(f"Found {num_files} FASTA file(s)") click.echo(f"Subtyping: {'enabled' if subtyping else 'disabled'}") click.echo(f"Splitting: {'enabled' if splitting else 'disabled'}") click.echo(f"Output directory: {output_path}") click.echo(f"Parallel jobs: {n_jobs or 'auto (all CPUs)'}") click.echo() elif not quiet: click.echo(f"Processing {num_files} FASTA file(s)...") start_time = time.time() try: from pyhiv import PyHIV PyHIV( fastas_dir=str(fastas_dir), subtyping=subtyping, splitting=splitting, output_dir=str(output_dir) if output_dir else None, n_jobs=n_jobs, reporting=reporting ) elapsed_time = time.time() - start_time if not quiet: click.secho(f"\n✓ Processing complete!", fg='green', bold=True) click.echo(f"Results saved to: {output_path}") click.echo(f"Time elapsed: {elapsed_time:.2f}s") # Show key output files if verbose: click.echo("\nGenerated files:") final_table = output_path / 'final_table.tsv' if final_table.exists(): click.echo(f" • {final_table}") # List some alignment files alignment_files = list(output_path.glob('best_alignment_*.fasta')) for af in alignment_files[:3]: click.echo(f" • {af}") if len(alignment_files) > 3: click.echo(f" • ... and {len(alignment_files) - 3} more alignment file(s)") # Show PDF report if generated if reporting: pdf_report = output_path / 'PyHIV_report_all_sequences.pdf' if pdf_report.exists(): # pragma: no cover click.echo(f" • {pdf_report}") except ImportError as e: click.secho(f"Error: Could not import PyHIV module: {e}", fg='red', err=True) sys.exit(1) except KeyboardInterrupt: click.secho("\n\nProcessing interrupted by user.", fg='yellow', err=True) sys.exit(130) except Exception as e: click.secho(f"Error during processing: {e}", fg='red', err=True) if verbose: import traceback traceback.print_exc() sys.exit(1) @click.command('validate') @click.argument( 'fastas_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), ) def validate(fastas_dir): """Validate FASTA files in the input directory without processing.""" num_files = count_fasta_files(fastas_dir) if num_files == 0: click.secho("✗ No FASTA files found.", fg='red') sys.exit(1) click.secho(f"✓ Found {num_files} FASTA file(s)", fg='green') # List files if not too many if num_files <= 10: files = [] for ext in SUPPORTED_FASTA_EXTENSIONS: files.extend(Path(fastas_dir).rglob(f'*{ext}')) files = list({f.resolve(): f for f in files}.values()) # Remove duplicates, preserve Path objects click.echo("\nFiles:") for f in files: click.echo(f" • {f.name}") # Create a group to allow multiple commands @click.group() @click.version_option(version=__version__, prog_name="PyHIV") def cli(): """PyHIV: HIV-1 sequence analysis toolkit""" pass cli.add_command(main, name='run') cli.add_command(validate) if __name__ == '__main__': # pragma: no cover cli()