"""
Token counting utilities for Imagen4 prompts
"""

import re
import logging
from typing import Optional

logger = logging.getLogger(__name__)

# 기본 토큰 추정 상수
AVERAGE_CHARS_PER_TOKEN = 4  # 영어 기준 평균값
KOREAN_CHARS_PER_TOKEN = 2   # 한글 기준 평균값
MAX_PROMPT_TOKENS = 480      # 최대 프롬프트 토큰 수


def estimate_token_count(text: str) -> int:
    """
    텍스트의 토큰 수를 추정합니다.
    
    정확한 토큰 계산을 위해서는 실제 토크나이저가 필요하지만,
    API 호출 전 빠른 검증을 위해 추정값을 사용합니다.
    
    Args:
        text: 토큰 수를 계산할 텍스트
        
    Returns:
        int: 추정된 토큰 수
    """
    if not text:
        return 0
    
    # 텍스트 정리
    text = text.strip()
    if not text:
        return 0
    
    # 한글과 영어 문자 분리
    korean_chars = len(re.findall(r'[가-힣]', text))
    english_chars = len(re.findall(r'[a-zA-Z]', text))
    other_chars = len(text) - korean_chars - english_chars
    
    # 토큰 수 추정
    korean_tokens = korean_chars / KOREAN_CHARS_PER_TOKEN
    english_tokens = english_chars / AVERAGE_CHARS_PER_TOKEN
    other_tokens = other_chars / AVERAGE_CHARS_PER_TOKEN
    
    estimated_tokens = int(korean_tokens + english_tokens + other_tokens)
    
    # 최소 1토큰은 보장
    return max(1, estimated_tokens)


def validate_prompt_length(prompt: str, max_tokens: int = MAX_PROMPT_TOKENS) -> tuple[bool, int, Optional[str]]:
    """
    프롬프트 길이를 검증합니다.
    
    Args:
        prompt: 검증할 프롬프트
        max_tokens: 최대 허용 토큰 수
        
    Returns:
        tuple: (유효성, 토큰 수, 오류 메시지)
    """
    if not prompt:
        return False, 0, "프롬프트가 비어있습니다."
    
    token_count = estimate_token_count(prompt)
    
    if token_count > max_tokens:
        error_msg = (
            f"프롬프트가 너무 깁니다. "
            f"현재: {token_count}토큰, 최대: {max_tokens}토큰. "
            f"프롬프트를 {token_count - max_tokens}토큰 줄여주세요."
        )
        return False, token_count, error_msg
    
    return True, token_count, None


def truncate_prompt(prompt: str, max_tokens: int = MAX_PROMPT_TOKENS) -> str:
    """
    프롬프트를 지정된 토큰 수로 자릅니다.
    
    Args:
        prompt: 자를 프롬프트
        max_tokens: 최대 토큰 수
        
    Returns:
        str: 잘린 프롬프트
    """
    if not prompt:
        return ""
    
    current_tokens = estimate_token_count(prompt)
    if current_tokens <= max_tokens:
        return prompt
    
    # 대략적인 비율로 텍스트 자르기
    ratio = max_tokens / current_tokens
    target_length = int(len(prompt) * ratio * 0.9)  # 여유분 10%
    
    truncated = prompt[:target_length]
    
    # 단어/문장 경계에서 자르기
    if len(truncated) < len(prompt):
        # 마지막 완전한 단어까지만 유지
        last_space = truncated.rfind(' ')
        last_korean = truncated.rfind('다')  # 한글 어미
        last_punct = max(truncated.rfind('.'), truncated.rfind(','), truncated.rfind('!'))
        
        cut_point = max(last_space, last_korean, last_punct)
        if cut_point > target_length * 0.8:  # 너무 많이 잘리지 않도록
            truncated = truncated[:cut_point]
    
    # 최종 검증
    final_tokens = estimate_token_count(truncated)
    if final_tokens > max_tokens:
        # 강제로 문자 단위로 자르기
        chars_per_token = len(truncated) / final_tokens
        target_chars = int(max_tokens * chars_per_token * 0.95)
        truncated = truncated[:target_chars]
    
    return truncated.strip()


def get_prompt_stats(prompt: str) -> dict:
    """
    프롬프트 통계 정보를 반환합니다.
    
    Args:
        prompt: 분석할 프롬프트
        
    Returns:
        dict: 프롬프트 통계
    """
    if not prompt:
        return {
            "character_count": 0,
            "estimated_tokens": 0,
            "korean_chars": 0,
            "english_chars": 0,
            "other_chars": 0,
            "is_valid": False,
            "remaining_tokens": MAX_PROMPT_TOKENS
        }
    
    char_count = len(prompt)
    korean_chars = len(re.findall(r'[가-힣]', prompt))
    english_chars = len(re.findall(r'[a-zA-Z]', prompt))
    other_chars = char_count - korean_chars - english_chars
    estimated_tokens = estimate_token_count(prompt)
    is_valid = estimated_tokens <= MAX_PROMPT_TOKENS
    remaining_tokens = MAX_PROMPT_TOKENS - estimated_tokens
    
    return {
        "character_count": char_count,
        "estimated_tokens": estimated_tokens,
        "korean_chars": korean_chars,
        "english_chars": english_chars,
        "other_chars": other_chars,
        "is_valid": is_valid,
        "remaining_tokens": remaining_tokens,
        "max_tokens": MAX_PROMPT_TOKENS
    }


# 실제 토크나이저 사용 시 대체할 수 있는 인터페이스
class TokenCounter:
    """토큰 카운터 인터페이스"""
    
    def __init__(self, tokenizer_name: Optional[str] = None):
        """
        토큰 카운터 초기화
        
        Args:
            tokenizer_name: 사용할 토크나이저 이름 (향후 확장용)
        """
        self.tokenizer_name = tokenizer_name or "estimate"
        logger.info(f"토큰 카운터 초기화: {self.tokenizer_name}")
    
    def count_tokens(self, text: str) -> int:
        """텍스트의 토큰 수 계산"""
        if self.tokenizer_name == "estimate":
            return estimate_token_count(text)
        else:
            # 향후 실제 토크나이저 구현
            raise NotImplementedError(f"토크나이저 '{self.tokenizer_name}'는 아직 구현되지 않았습니다.")
    
    def validate_prompt(self, prompt: str, max_tokens: int = MAX_PROMPT_TOKENS) -> tuple[bool, int, Optional[str]]:
        """프롬프트 검증"""
        return validate_prompt_length(prompt, max_tokens)
    
    def truncate_prompt(self, prompt: str, max_tokens: int = MAX_PROMPT_TOKENS) -> str:
        """프롬프트 자르기"""
        return truncate_prompt(prompt, max_tokens)


# 전역 토큰 카운터 인스턴스
default_token_counter = TokenCounter()