"""
增强的日志分析工具
"""
import re
import json
from typing import List, Dict, Optional
from datetime import datetime, timedelta
from collections import Counter, defaultdict
from loguru import logger
from pathlib import Path


class EnhancedLogAnalyzer:
    """增强的日志分析器"""
    
    def __init__(self, log_dir: str = "logs"):
        self.log_dir = Path(log_dir)
        self.patterns = {
            'error': re.compile(r'ERROR|CRITICAL|FATAL', re.IGNORECASE),
            'warning': re.compile(r'WARNING|WARN', re.IGNORECASE),
            'info': re.compile(r'INFO', re.IGNORECASE),
            'http_request': re.compile(r'(\w+)\s+([/\w]+)\s+(\d{3})'),
            'ip_address': re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b'),
            'timestamp': re.compile(r'\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}'),
        }
    
    def analyze_logs(self, log_file: str, hours: int = 24) -> Dict:
        """分析日志文件"""
        log_path = self.log_dir / log_file
        
        if not log_path.exists():
            logger.warning(f"日志文件不存在: {log_file}")
            return {}
        
        start_time = datetime.now() - timedelta(hours=hours)
        
        stats = {
            'total_lines': 0,
            'errors': 0,
            'warnings': 0,
            'info': 0,
            'http_requests': defaultdict(int),
            'ip_addresses': Counter(),
            'error_messages': Counter(),
            'top_errors': [],
            'top_ips': [],
            'request_stats': {},
        }
        
        try:
            with open(log_path, 'r', encoding='utf-8', errors='ignore') as f:
                for line in f:
                    stats['total_lines'] += 1
                    
                    # 检查时间戳
                    timestamp_match = self.patterns['timestamp'].search(line)
                    if timestamp_match:
                        try:
                            log_time = datetime.strptime(timestamp_match.group(), '%Y-%m-%d %H:%M:%S')
                            if log_time < start_time:
                                continue
                        except ValueError:
                            pass
                    
                    # 统计错误级别
                    if self.patterns['error'].search(line):
                        stats['errors'] += 1
                        # 提取错误消息
                        error_msg = self._extract_error_message(line)
                        if error_msg:
                            stats['error_messages'][error_msg] += 1
                    
                    elif self.patterns['warning'].search(line):
                        stats['warnings'] += 1
                    
                    elif self.patterns['info'].search(line):
                        stats['info'] += 1
                    
                    # 提取HTTP请求
                    http_match = self.patterns['http_request'].search(line)
                    if http_match:
                        method, endpoint, status = http_match.groups()
                        key = f"{method} {endpoint} {status}"
                        stats['http_requests'][key] += 1
                    
                    # 提取IP地址
                    ip_matches = self.patterns['ip_address'].findall(line)
                    for ip in ip_matches:
                        stats['ip_addresses'][ip] += 1
            
            # 生成Top列表
            stats['top_errors'] = [
                {'message': msg, 'count': count}
                for msg, count in stats['error_messages'].most_common(10)
            ]
            
            stats['top_ips'] = [
                {'ip': ip, 'count': count}
                for ip, count in stats['ip_addresses'].most_common(10)
            ]
            
            # HTTP请求统计
            stats['request_stats'] = {
                'total': sum(stats['http_requests'].values()),
                'by_method': self._group_by_method(stats['http_requests']),
                'by_status': self._group_by_status(stats['http_requests']),
            }
        
        except Exception as e:
            logger.error(f"分析日志失败: {e}")
        
        return stats
    
    def _extract_error_message(self, line: str) -> Optional[str]:
        """提取错误消息"""
        # 尝试提取错误消息的关键部分
        parts = line.split('|')
        if len(parts) > 2:
            return parts[-1].strip()[:100]  # 限制长度
        return None
    
    def _group_by_method(self, requests: Dict) -> Dict:
        """按方法分组"""
        grouped = defaultdict(int)
        for key, count in requests.items():
            method = key.split()[0]
            grouped[method] += count
        return dict(grouped)
    
    def _group_by_status(self, requests: Dict) -> Dict:
        """按状态码分组"""
        grouped = defaultdict(int)
        for key, count in requests.items():
            status = key.split()[-1]
            grouped[status] += count
        return dict(grouped)
    
    def search_logs(self, query: str, log_file: str, limit: int = 100) -> List[Dict]:
        """搜索日志"""
        log_path = self.log_dir / log_file
        
        if not log_path.exists():
            return []
        
        results = []
        query_lower = query.lower()
        
        try:
            with open(log_path, 'r', encoding='utf-8', errors='ignore') as f:
                for line_num, line in enumerate(f, 1):
                    if query_lower in line.lower():
                        results.append({
                            'line_number': line_num,
                            'content': line.strip(),
                            'timestamp': self._extract_timestamp(line),
                        })
                        
                        if len(results) >= limit:
                            break
        except Exception as e:
            logger.error(f"搜索日志失败: {e}")
        
        return results
    
    def _extract_timestamp(self, line: str) -> Optional[str]:
        """提取时间戳"""
        match = self.patterns['timestamp'].search(line)
        if match:
            return match.group()
        return None
    
    def generate_report(self, log_file: str, hours: int = 24) -> str:
        """生成分析报告"""
        stats = self.analyze_logs(log_file, hours)
        
        report = f"""
日志分析报告
============
文件: {log_file}
时间范围: 最近 {hours} 小时
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

统计信息
--------
总行数: {stats.get('total_lines', 0)}
错误: {stats.get('errors', 0)}
警告: {stats.get('warnings', 0)}
信息: {stats.get('info', 0)}

Top 10 错误
-----------
"""
        for i, error in enumerate(stats.get('top_errors', [])[:10], 1):
            report += f"{i}. {error['message'][:80]} ({error['count']}次)\n"
        
        report += "\nTop 10 IP地址\n------------\n"
        for i, ip_info in enumerate(stats.get('top_ips', [])[:10], 1):
            report += f"{i}. {ip_info['ip']} ({ip_info['count']}次)\n"
        
        report += "\nHTTP请求统计\n------------\n"
        request_stats = stats.get('request_stats', {})
        report += f"总请求数: {request_stats.get('total', 0)}\n"
        
        if 'by_method' in request_stats:
            report += "\n按方法统计:\n"
            for method, count in request_stats['by_method'].items():
                report += f"  {method}: {count}\n"
        
        if 'by_status' in request_stats:
            report += "\n按状态码统计:\n"
            for status, count in request_stats['by_status'].items():
                report += f"  {status}: {count}\n"
        
        return report
    
    def export_report(self, log_file: str, output_file: str, hours: int = 24):
        """导出报告到文件"""
        report = self.generate_report(log_file, hours)
        
        output_path = Path(output_file)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(report)
        
        logger.info(f"报告已导出: {output_file}")

