#!/usr/bin/env python3 """ Email Address Validator Reads a list of email addresses from a file and validates them. Performs both syntax validation and basic domain checking. """ import re import sys import socket import argparse from typing import List, Tuple, Dict from pathlib import Path class EmailValidator: def __init__(self): # RFC 5322 compliant email regex (simplified but robust) self.email_pattern = re.compile( r'^[a-zA-Z0-9.!#$%&\'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$' ) def validate_syntax(self, email: str) -> bool: """Validate email syntax using regex.""" if not email or len(email) > 254: return False # Split local and domain parts parts = email.rsplit('@', 1) if len(parts) != 2: return False local_part, domain = parts # Check local part length (max 64 characters) if len(local_part) > 64: return False # Use regex for full validation return bool(self.email_pattern.match(email)) def validate_domain(self, email: str) -> bool: """Validate domain by checking if MX or A record exists.""" try: domain = email.split('@')[1] # Try to get MX record first (preferred for email) try: socket.getaddrinfo(domain, None) return True except socket.gaierror: return False except (IndexError, socket.error): return False def validate_email(self, email: str, check_domain: bool = True) -> Dict[str, any]: """ Comprehensive email validation. Returns a dict with validation results. """ email = email.strip().lower() result = { 'email': email, 'valid_syntax': False, 'valid_domain': False, 'overall_valid': False, 'errors': [] } # Check syntax if self.validate_syntax(email): result['valid_syntax'] = True else: result['errors'].append('Invalid email syntax') # Check domain if syntax is valid if result['valid_syntax'] and check_domain: if self.validate_domain(email): result['valid_domain'] = True else: result['errors'].append('Domain does not exist or is unreachable') elif not check_domain: result['valid_domain'] = True # Skip domain check # Overall validity result['overall_valid'] = result['valid_syntax'] and result['valid_domain'] return result def read_emails_from_file(file_path: str) -> List[str]: """Read email addresses from a file, one per line.""" try: with open(file_path, 'r', encoding='utf-8') as f: emails = [line.strip() for line in f if line.strip()] return emails except FileNotFoundError: print(f"Error: File '{file_path}' not found.") sys.exit(1) except IOError as e: print(f"Error reading file '{file_path}': {e}") sys.exit(1) def print_results(results: List[Dict], verbose: bool = False): """Print validation results in a formatted way.""" valid_count = sum(1 for r in results if r['overall_valid']) total_count = len(results) print(f"\nValidation Results:") print(f"=" * 50) print(f"Total emails checked: {total_count}") print(f"Valid emails: {valid_count}") print(f"Invalid emails: {total_count - valid_count}") print(f"Success rate: {(valid_count/total_count)*100:.1f}%") print() if verbose: print("Detailed Results:") print("-" * 50) for result in results: status = "✓ VALID" if result['overall_valid'] else "✗ INVALID" print(f"{status:10} | {result['email']}") if result['errors']: for error in result['errors']: print(f" | Error: {error}") print() # Show invalid emails invalid_emails = [r for r in results if not r['overall_valid']] if invalid_emails: print("Invalid Emails:") print("-" * 30) for result in invalid_emails: print(f"• {result['email']}") for error in result['errors']: print(f" - {error}") print() def save_results(results: List[Dict], output_file: str): """Save results to a file.""" try: with open(output_file, 'w', encoding='utf-8') as f: f.write("Email,Valid,Syntax_Valid,Domain_Valid,Errors\n") for result in results: errors_str = '; '.join(result['errors']) if result['errors'] else '' f.write(f"{result['email']},{result['overall_valid']}," f"{result['valid_syntax']},{result['valid_domain']},\"{errors_str}\"\n") print(f"Results saved to: {output_file}") except IOError as e: print(f"Error saving results to '{output_file}': {e}") def main(): parser = argparse.ArgumentParser( description='Validate email addresses from a file', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python email_validator.py emails.txt python email_validator.py emails.txt --no-domain-check python email_validator.py emails.txt --verbose --output results.csv python email_validator.py emails.txt -v -o validated_emails.csv """ ) parser.add_argument('input_file', help='Path to file containing email addresses (one per line)') parser.add_argument('--no-domain-check', action='store_true', help='Skip domain validation (faster, syntax-only)') parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed results for each email') parser.add_argument('-o', '--output', help='Save results to CSV file') args = parser.parse_args() # Check if input file exists if not Path(args.input_file).exists(): print(f"Error: Input file '{args.input_file}' does not exist.") sys.exit(1) # Read emails from file print(f"Reading emails from: {args.input_file}") emails = read_emails_from_file(args.input_file) if not emails: print("No email addresses found in the file.") sys.exit(1) print(f"Found {len(emails)} email addresses to validate.") # Validate emails validator = EmailValidator() results = [] check_domain = not args.no_domain_check if not check_domain: print("Note: Domain checking is disabled (syntax validation only)") print("\nValidating emails...") for i, email in enumerate(emails, 1): if args.verbose: print(f"Checking {i}/{len(emails)}: {email}", end="") result = validator.validate_email(email, check_domain) results.append(result) if args.verbose: status = " ✓" if result['overall_valid'] else " ✗" print(status) # Print results print_results(results, args.verbose) # Save results if requested if args.output: save_results(results, args.output) if __name__ == "__main__": main()