commit 7d8e627fe44c7721e26d446f44140ecea9144d57 Author: ened Date: Mon Aug 25 00:39:39 2025 +0900 initial implementation diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..205f181 --- /dev/null +++ b/.env.example @@ -0,0 +1,63 @@ +# ============================================================================== +# GPTEdit Configuration File +# ============================================================================== +# Copy this file to .env and update with your settings + +# ------------------------------------------------------------------------------ +# OpenAI API Configuration (REQUIRED) +# ------------------------------------------------------------------------------ +OPENAI_API_KEY=your-api-key-here # Required: Your OpenAI API key + +# ------------------------------------------------------------------------------ +# Server Configuration +# ------------------------------------------------------------------------------ +LOG_LEVEL=INFO # Logging level: DEBUG, INFO, WARNING, ERROR +MAX_IMAGE_SIZE_MB=4 # Max image size before auto-optimization (MB) +DEFAULT_TIMEOUT=30 # API request timeout in seconds + +# ------------------------------------------------------------------------------ +# Directory Configuration +# ------------------------------------------------------------------------------ +# Input directory for source images to edit +INPUT_PATH=./input_images # Default: ./input_images + # Place images here for edit_image_from_file + # to read and process + +# Output directory for all generated images and files +GENERATED_IMAGES_PATH=./generated_images # Default: ./generated_images + # Structure: + # ├── {base_name}_000.png - Original input images + # ├── {base_name}_001.png - Edited output images + # └── {base_name}_001.json - JSON parameter files + +# File naming configuration +OUTPUT_FILENAME_PREFIX=gptimage1 # Prefix for output files + # Example workflow: + # 1. Place image.jpg in INPUT_PATH + # 2. Call edit_image_from_file("image.jpg", "make it brighter") + # 3. Results in GENERATED_IMAGES_PATH: + # - gptimage1_20250824_143022_000.png (original) + # - gptimage1_20250824_143022_001.png (edited) + # - gptimage1_20250824_143022_001.json (parameters) + +# Save options +SAVE_ORIGINALS=true # Copy input images to GENERATED_IMAGES_PATH as {base_name}_000.png +SAVE_PARAMETERS=true # Save edit parameters as JSON files + +# ------------------------------------------------------------------------------ +# Feature Flags +# ------------------------------------------------------------------------------ +ENABLE_AUTO_MASK=false # Auto-generate masks (future feature) +ENABLE_AUTO_OPTIMIZE=true # Auto-convert large images to WebP + +# ------------------------------------------------------------------------------ +# Advanced Settings (Optional) +# ------------------------------------------------------------------------------ +# WebP Optimization Settings +WEBP_QUALITY_MIN=60 # Minimum WebP quality (1-100) +WEBP_QUALITY_MAX=95 # Maximum WebP quality (1-100) +WEBP_METHOD=6 # WebP compression method (0-6, 6=best) + +# Image Processing +AUTO_RESIZE_THRESHOLD=0.5 # Min scale factor when resizing (0.1-1.0) +PRESERVE_METADATA=false # Keep EXIF data in processed images diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..94c46c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,75 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Virtual environments +venv/ +ENV/ +env/ +.venv + +# IDEs +.idea/ +.vscode/ +*.swp +*.swo +*~ +.DS_Store + +# Environment variables +.env +*.env + +# Log files +*.log +logs/ + +# Generated images +generated_images/ +edited_images/ +temp/ + +# Test outputs +test_outputs/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2399eca --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,615 @@ +# GPTEdit MCP Server - 설계 및 개발 가이드 + +## 📋 프로젝트 개요 +GPTEdit는 OpenAI의 이미지 편집 API를 MCP(Model Context Protocol) 서버로 구현한 프로젝트입니다. +이 문서는 향후 유사한 MCP 서버 개발 시 참고할 수 있는 설계 원칙과 구현 가이드를 제공합니다. + +## 🏗️ 핵심 설계 원칙 + +### 1. 파일 구조 단순화 +**✅ 단일 디렉토리 구조 채택:** +``` +generated_images/ +├── gptimage1_123456_20250824_143022_000.png # 입력 파일 +├── gptimage1_123456_20250824_143022_001.png # 첫 번째 출력 +├── gptimage1_123456_20250824_143022_001.json # 첫 번째 출력 파라미터 +├── gptimage1_123456_20250824_143022_mask.png # 마스크 파일 (필요시) +└── ... +``` + +### 2. 파일 명명 규칙 +#### Base Name 형식 +`gptimage1_{seed}_{yyyymmdd}_{hhmmss}` +- `gptimage1`: 고정 prefix (환경변수로 변경 가능) +- `{seed}`: 6자리 랜덤 시드 (세션 단위로 유지) +- `{yyyymmdd}`: 날짜 (예: 20250824) +- `{hhmmss}`: 시간 (예: 143022) + +#### 파일 타입별 명명 +- **입력 파일**: `{base_name}_000.{ext}` (000은 항상 입력을 의미) +- **출력 파일**: `{base_name}_001.png`, `{base_name}_002.png`, ... +- **파라미터 파일**: `{base_name}_001.json`, `{base_name}_002.json`, ... +- **마스크 파일**: `{base_name}_mask.{ext}` + +### 3. 세션 기반 시드 관리 +```python +class ToolHandlers: + def __init__(self, config): + self.current_seed = None # 세션 시드 + + def _get_or_create_seed(self): + if self.current_seed is None: + self.current_seed = random.randint(0, 999999) + return self.current_seed + + def _reset_seed(self): + self.current_seed = None # 작업 완료 후 리셋 +``` + +## ⚠️ API 수정 시 필수 체크리스트 + +### 함수명이나 파라미터 변경 시 반드시 확인할 파일들 +MCP 서버는 Claude와 통신하는 양방향 시스템입니다. **한쪽만 수정하면 작동하지 않습니다!** + +#### 1. 서버 측 (MCP Server) +- `src/server/models.py` - 도구 정의 (Tool definitions) +- `src/server/handlers.py` - 도구 실행 로직 (Handler implementations) +- `src/server/mcp_server.py` - 도구 등록 및 라우팅 (Tool registration) + +#### 2. 커넥터 측 (API Connector) +- `src/connector/openai_client.py` - API 호출 로직 +- `src/connector/config.py` - 설정 및 파라미터 관리 +- `src/connector/models.py` - 데이터 모델 정의 (있는 경우) + +#### 3. 유틸리티 +- `src/utils/` - 공통 함수들 (파라미터 타입 변경 시) + +#### 4. 문서 +- `README.md` - 사용 예제 +- `TECHNICAL_SPECS.md` - API 스펙 +- `MCP_CONNECTOR_GUIDE.md` - 연동 가이드 + +### 수정 순서 (중요!) + +1. **모델 정의 수정** (`models.py`) + ```python + # 예: edit_simple → edit_image + def get_edit_image_tool() -> Tool: + return Tool( + name="edit_image", # 이름 변경 + inputSchema={ + "properties": { + "input_image_b64": {...} # 파라미터 변경 + } + } + ) + ``` + +2. **핸들러 수정** (`handlers.py`) + ```python + async def handle_edit_image(self, arguments): + # 새 파라미터 처리 로직 + if 'input_image_b64' not in arguments: # 변경된 파라미터 + ``` + +3. **서버 라우팅 수정** (`mcp_server.py`) + ```python + if name == "edit_image": # 변경된 이름 + return await self.handlers.handle_edit_image(arguments) + ``` + +4. **테스트 실행** + ```bash + # 단독 테스트 + python tests/test_server.py + + # Claude 연동 테스트 + python main.py + # Claude Desktop에서 도구 호출 테스트 + ``` + +### 자주 발생하는 실수 + +❌ **하지 말아야 할 것:** +- 한쪽만 수정하고 테스트 +- 프롬프트 이름만 바꾸고 핸들러는 그대로 둠 +- 파라미터 이름 변경 시 validation 로직 미수정 + +✅ **반드시 해야 할 것:** +- 모든 관련 파일 동시 수정 +- 변경 후 즉시 테스트 +- 문서 업데이트 + +### 실제 수정 예시: image_path → input_image_b64 + +#### 변경 이유 +Claude가 업로드된 이미지를 바로 처리할 수 있도록 Base64 입력으로 변경 + +#### 수정한 파일들 + +1. **`src/server/models.py`** - 도구 정의 + ```python + # Before + "image_path": { + "type": "string", + "description": "Path to the image file" + } + + # After + "input_image_b64": { + "type": "string", + "description": "Base64 encoded input image data" + } + ``` + +2. **`src/server/handlers.py`** - 핸들러 로직 + ```python + # Before + image_path = arguments.get('image_path') + if not image_path or not Path(image_path).exists(): + return [TextContent(text="Image not found")] + + # After + if 'input_image_b64' not in arguments: + return [TextContent(text="input_image_b64 is required")] + + # Save b64 to temp file + image_path = self._save_b64_to_temp_file( + arguments['input_image_b64'], + base_name, + 0, + "input" + ) + ``` + +3. **`src/utils/image_utils.py`** - 새 함수 추가 + ```python + def decode_image_base64(base64_str: str) -> bytes: + """Decode base64 string to image data""" + return base64.b64decode(base64_str) + ``` + +### 디버깅 팁 + +#### 에러 발생 시 확인 순서 + +1. **로그 파일 확인** + ```bash + tail -f gptedit.log + ``` + +2. **Claude Desktop 에러 메시지** + - "Method not found" → 함수명 불일치 + - "Invalid arguments" → 파라미터 문제 + - "Server disconnected" → Python 크래시 + +3. **단계별 테스트** + ```python + # tests/test_api_change.py + async def test_new_parameter(): + handlers = ToolHandlers(config) + result = await handlers.handle_edit_image({ + "input_image_b64": test_b64, + "prompt": "test" + }) + assert result[0].type == "text" + ``` + +4. **JSON-RPC 통신 확인** + ```python + # 로깅 추가로 통신 내용 확인 + logger.debug(f"Received: {json.dumps(request, indent=2)}") + logger.debug(f"Sending: {json.dumps(response, indent=2)}") + ``` + +### API 변경 테스트 체크리스트 + +☑️ **1단계: 단독 테스트** +```bash +# Python 직접 테스트 +python -c "from src.server.models import MCPToolDefinitions; print([t.name for t in MCPToolDefinitions.get_all_tools()])" +# 결과: ['edit_image', 'edit_with_mask', ...] 확인 +``` + +☑️ **2단계: 서버 실행 테스트** +```bash +python main.py +# 로그에 "Tool called: edit_image" 확인 +``` + +☑️ **3단계: Claude Desktop 테스트** +1. Claude Desktop 재시작 +2. "사용 가능한 도구가 무엇인가요?" 질문 +3. 변경된 도구명 확인 +4. 실제 호출 테스트 + +☑️ **4단계: 파라미터 테스트** +```python +# tests/test_parameters.py +import asyncio +from src.server.handlers import ToolHandlers +from src.connector import Config + +async def test(): + handlers = ToolHandlers(Config()) + # 새 파라미터로 테스트 + result = await handlers.handle_edit_image({ + "input_image_b64": "base64_data_here", + "prompt": "test prompt" + }) + print(result) + +asyncio.run(test()) +``` + +## 🔧 MCP 서버 구현 체크리스트 + +### 필수 MCP 메서드 +MCP 서버는 다음 메서드들을 반드시 구현해야 합니다: + +1. **`list_tools()`** - 사용 가능한 도구 목록 반환 +2. **`call_tool()`** - 도구 실행 +3. **`list_prompts()`** - 프롬프트 템플릿 목록 반환 +4. **`get_prompt()`** - 특정 프롬프트 템플릿 반환 +5. **`list_resources()`** - 리소스 목록 반환 (없으면 빈 리스트) + +### 로깅 설정 +```python +# stdout과 충돌 방지를 위해 stderr 사용 +logging.basicConfig( + level=logging.INFO, + handlers=[ + logging.FileHandler('gptedit.log', encoding='utf-8'), + logging.StreamHandler(sys.stderr) # stderr 사용! + ] +) +``` + +## 📁 프로젝트 구조 + +``` +gptedit/ +├── src/ +│ ├── connector/ # API 연결 모듈 +│ │ ├── config.py # 설정 관리 및 파일명 생성 +│ │ └── openai_client.py +│ ├── server/ # MCP 서버 +│ │ ├── mcp_server.py # MCP 핸들러 등록 +│ │ ├── handlers.py # 도구 구현 +│ │ └── models.py # 도구 정의 +│ └── utils/ # 유틸리티 +├── generated_images/ # 모든 이미지 저장 (단일 디렉토리) +├── temp/ # 임시 파일 +├── .env # 환경 변수 +└── main.py # 진입점 +``` + +## 🔑 환경 변수 설정 + +`.env` 파일 예시: +```bash +# API 설정 +OPENAI_API_KEY=sk-xxxxx +OPENAI_ORGANIZATION=org-xxxxx # 선택사항 + +# 서버 설정 +LOG_LEVEL=INFO +MAX_IMAGE_SIZE_MB=4 +DEFAULT_TIMEOUT=30 + +# 파일 명명 설정 +OUTPUT_FILENAME_PREFIX=gptimage1 # 파일명 prefix + +# 기능 플래그 +ENABLE_AUTO_OPTIMIZE=true +SAVE_ORIGINALS=true +SAVE_PARAMETERS=true + +# 경로 설정 (기본값 사용 권장) +# GENERATED_IMAGES_PATH=./generated_images +``` + +## 🎯 Base64 입력 지원 (2025-01-17 업데이트) + +### 변경사항 +Claude와의 통합을 개선하기 위해 `image_path` 대신 `image_b64`를 사용하도록 변경했습니다. + +#### 이전 방식 (image_path 사용) +```python +{ + "image_path": "/path/to/image.png", + "prompt": "edit the image" +} +``` + +#### 현재 방식 (input_image_b64 사용) +```python +{ + "input_image_b64": "base64_encoded_string_here", # PNG, JPEG, WebP 등 지원 + "prompt": "edit the image", + "background": "transparent", # 선택사항: "transparent" 또는 "opaque" + "save_to_file": true # 선택사항: 파일로 저장 여부 +} +``` + +### 장점 +1. **즉시 사용 가능**: Claude가 업로드된 이미지를 바로 처리 +2. **경로 문제 해결**: 파일 시스템 경로 찾기 불필요 +3. **직접 통합**: Claude의 이미지 인식과 직접 연동 + +### 구현 패턴 +```python +def _save_b64_to_temp_file(self, b64_data: str, base_name: str, index: int, file_type: str = "input") -> str: + """Base64 데이터를 파일로 저장 (다양한 포맷 지원)""" + # 1. Base64 디코딩 + image_data = decode_image_base64(b64_data) + + # 2. 이미지 포맷 자동 감지 + with Image.open(io.BytesIO(image_data)) as img: + format_ext = img.format.lower() # PNG, JPEG, WEBP 등 감지 + + # 마스크는 PNG로 변환 (OpenAI API 요구사항) + if file_type == "mask" and format_ext != 'png': + buffer = io.BytesIO() + img.save(buffer, format='PNG') + image_data = buffer.getvalue() + format_ext = 'png' + + # 3. 파일명 생성 (원본 포맷 유지) + if file_type == "mask": + filename = f"{base_name}_mask.{format_ext}" + else: + filename = f"{base_name}_{index:03d}.{format_ext}" + + # 4. generated_images에 저장 + file_path = self.config.generated_images_path / filename + save_image(image_data, str(file_path)) + + return str(file_path) +``` + +### 워크플로우 +1. Claude가 이미지를 base64로 전송 (원본 포맷 유지: PNG, JPEG, WebP 등) +2. 포맷을 자동 감지하여 `generated_images`에 저장 (입력 추적용) +3. 저장된 파일 경로로 OpenAI API 호출 +4. 결과를 저장하고 base64로 반환 + +### 지원 이미지 포맷 +- **입력 이미지**: PNG, JPEG, WebP, GIF, BMP, TIFF +- **마스크 이미지**: PNG (다른 포맷은 자동 변환) +- **출력 이미지**: PNG (OpenAI API 기본값) + +## 🛠️ 구현 패턴 + +### 1. Base Name 생성 +```python +def generate_base_name(seed: Optional[int] = None) -> str: + """gptimage1_{seed}_{yyyymmdd}_{hhmmss} 형식으로 생성""" + if seed is None: + seed = random.randint(0, 999999) + + now = datetime.now() + date_str = now.strftime("%Y%m%d") + time_str = now.strftime("%H%M%S") + + return f"gptimage1_{seed}_{date_str}_{time_str}" +``` + +### 2. 파일 저장 패턴 +```python +# Base name 생성 +seed = self._get_or_create_seed() +base_name = self.config.generate_base_name(seed) + +# 입력 파일 저장 +input_path = self.config.get_output_path(base_name, 0, 'png') +shutil.copy2(original_image, input_path) + +# 출력 파일 저장 +output_path = self.config.get_output_path(base_name, 1, 'png') +save_image(edited_image, output_path) + +# 파라미터 저장 +json_path = self.config.get_output_path(base_name, 1, 'json') +with open(json_path, 'w') as f: + json.dump(params, f, indent=2) +``` + +### 3. 작업 플로우 +```python +async def handle_edit_image(self, arguments): + try: + # 1. 시드 생성/가져오기 + seed = self._get_or_create_seed() + base_name = self.config.generate_base_name(seed) + + # 2. 입력 파일 저장 (000) + input_path = self.config.get_output_path(base_name, 0, ext) + + # 3. 처리 실행 + response = await self.client.edit_image(request) + + # 4. 출력 파일 저장 (001) + output_path = self.config.get_output_path(base_name, 1, 'png') + + # 5. 파라미터 저장 (001.json) + json_path = self.config.get_output_path(base_name, 1, 'json') + + finally: + # 6. 세션 종료 시 시드 리셋 + self._reset_seed() +``` + +## 📝 저장되는 파라미터 구조 + +```json +{ + "base_name": "gptimage1_123456_20250824_143022", + "seed": 123456, + "timestamp": "2025-08-24T14:30:22.123456", + "prompt": "make the image more colorful", + "background": "transparent", + "input_image": "generated_images/gptimage1_123456_20250824_143022_000.png", + "input_size": [1024, 768], + "output_size": [1024, 1024], + "execution_time": 3.45, + "optimization": { + "optimized": true, + "original_size_mb": 5.2, + "final_size_mb": 3.8, + "format_used": "PNG" + }, + "token_stats": { + "estimated_tokens": 45, + "token_limit": 1000, + "usage_percentage": 4.5 + }, + "config": { + "model": "gpt-image-1", + "quality": "high", + "api_version": "gpt-image-1" + } +} +``` + +## 🚀 실행 및 테스트 + +### 서버 실행 +```bash +python main.py +``` + +### Claude Desktop 설정 +`claude_desktop_config.json`: +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:/Project/little-fairy/gptedit/main.py"] + } + } +} +``` + +## ⚠️ 일반적인 문제 해결 + +### 1. "Method not found" 에러 +- `list_prompts()`, `list_resources()` 메서드 구현 확인 +- MCP 서버 이름이 단순한지 확인 (예: "gptedit") + +### 2. JSON 파싱 에러 +- 로그가 stdout으로 출력되지 않도록 stderr 사용 +- print() 대신 logger 사용 + +### 3. 파일 명명 충돌 +- 타임스탬프와 시드 조합으로 유니크성 보장 +- 같은 세션 내에서는 동일 시드 사용 + +## 📚 핵심 교훈 + +1. **일관된 명명**: `prefix_seed_date_time_number` 패턴 고수 +2. **세션 관리**: 한 작업 세션 동안 동일 시드 유지 +3. **단순한 구조**: 모든 파일을 한 디렉토리에 저장 +4. **추적 가능성**: 파일명만으로 언제, 어떤 작업인지 파악 가능 +5. **자동화**: Base name 생성과 파일 경로 관리 자동화 + +## 🔄 다중 편집 시나리오 + +### 같은 이미지를 여러 번 편집 +``` +# 첫 번째 편집 (seed: 123456) +gptimage1_123456_20250824_143022_000.png # 원본 +gptimage1_123456_20250824_143022_001.png # 첫 편집 결과 + +# 두 번째 편집 (새 seed: 789012) +gptimage1_789012_20250824_143055_000.png # 원본 (복사) +gptimage1_789012_20250824_143055_001.png # 두 번째 편집 결과 +``` + +### 배치 편집 +``` +# 배치 작업 (seed: 456789) +gptimage1_456789_20250824_144512_001.png # 첫 번째 이미지 결과 +gptimage1_456789_20250824_144512_002.png # 두 번째 이미지 결과 +gptimage1_456789_20250824_144512_003.png # 세 번째 이미지 결과 +``` + +## ⚠️ API 변경 시 놓치기 쉬운 부분들 + +### 1. 프롬프트 정의 (list_prompts) +함수명 변경 시 `mcp_server.py`의 프롬프트 정의도 함께 수정해야 합니다: +```python +@self.server.list_prompts() +async def handle_list_prompts(): + prompts = [ + Prompt( + name="edit_image", # ← 이것도 변경 필요! + description="Edit an image with AI", +``` + +### 2. 배치 처리 (batch_edit) +`batch_edit`는 여전히 `image_path`를 사용할 수 있습니다. +두 함수의 일관성을 유지하려면 함께 수정해야 합니다. + +### 3. 유틸리티 함수 +파라미터 타입 변경 시 관련 유틸리티 함수들도 확인: +- `validation.py`: 파라미터 검증 함수 +- `image_utils.py`: 이미지 처리 함수 +- `token_utils.py`: 토큰 계산 함수 + +### 4. 테스트 파일 +`tests/` 폴더의 모든 테스트 파일들도 업데이트: +- 예시 데이터 +- 함수 호출 +- 기대값 검증 + +### 5. 에러 메시지 +에러 메시지에서도 변경된 파라미터명 사용: +```python +# Before +return [TextContent(text="image_path is required")] + +# After +return [TextContent(text="input_image_b64 is required")] +``` + +### 6. 로깅 메시지 +로깅에서도 일관성 유지: +```python +logger.info(f"Processing edit_image with prompt: {prompt}") +# 함수명이 로그에도 반영되어야 함 +``` + +### 7. 문서의 코드 예시 +문서에 있는 코드 예시들도 모두 업데이트: +- README.md의 사용 예시 +- TECHNICAL_SPECS.md의 API 스펙 +- CLAUDE.md의 구현 패턴 + +### 8. 통합 테스트 체크리스트 + +API 변경 후 반드시 실행할 통합 테스트: + +☑️ **MCP 서버 테스트** +- [ ] `python main.py` 실행 확인 +- [ ] 로그에 에러 없는지 확인 +- [ ] Ctrl+C로 정상 종료 확인 + +☑️ **Claude Desktop 테스트** +- [ ] Claude Desktop 완전 종료 후 재시작 +- [ ] "사용 가능한 도구가 무엇인가요?" 질문 +- [ ] 변경된 도구명 표시 확인 +- [ ] 실제 이미지 업로드 테스트 +- [ ] 결과 파일 생성 확인 + +☑️ **파일 시스템 테스트** +- [ ] `generated_images/` 폴더에 파일 생성 확인 +- [ ] 파일명 형식 확인 (base_name 패턴) +- [ ] JSON 파라미터 파일 저장 확인 + +--- + +이 가이드를 따르면 일관되고 추적 가능한 파일 구조를 가진 MCP 서버를 개발할 수 있습니다. + +**💡 기억하세요: MCP 서버는 양방향 통신입니다. 한쪽만 수정하면 작동하지 않습니다!** diff --git a/CLEANUP_SUMMARY.md b/CLEANUP_SUMMARY.md new file mode 100644 index 0000000..2dfde17 --- /dev/null +++ b/CLEANUP_SUMMARY.md @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +Project Cleanup Summary for GPTEdit +Generated after cleanup operation +""" + +import os +from pathlib import Path +from datetime import datetime + +def generate_cleanup_summary(): + """Generate summary of cleaned files""" + + print("🧹 GPTEdit Project Cleanup Summary") + print("=" * 50) + print(f"📅 Cleanup Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print() + + print("✅ REMOVED FILES:") + removed_files = [ + "src/connector/openai_client_backup.py.disabled (contained problematic response_format)", + "tests/image_utils_backup.py (outdated backup)", + "debug_gptedit.py (debug script)", + "debug_path.py (debug script)", + "quick_test.py (test script)", + "replay_edit.py (utility script)", + "test_api_key.py (standalone test)", + "test_size_optimization.py (standalone test)", + "test_verification.py (standalone test)", + "temp/fairy_image.png (old temp file)", + "clear_cache.py (cleanup utility)", + "search_response_format.py (diagnostic utility)" + ] + + for item in removed_files: + print(f" 🗑️ {item}") + + print() + print("📁 KEPT ESSENTIAL FILES:") + essential_files = [ + "main.py (main server entry point)", + "requirements.txt (dependencies)", + ".env / .env.example (configuration)", + "README.md (documentation)", + "src/ (core source code)", + "tests/ (unit tests)", + "input_images/ (input directory)", + "generated_images/ (output directory)", + "temp/imagen4.png (current working file)", + "temp/optimized_imagen4.png (current optimized file)" + ] + + for item in essential_files: + print(f" 📄 {item}") + + print() + print("🎯 NEXT STEPS:") + print(" 1. Restart the MCP server completely") + print(" 2. Test image editing functionality") + print(" 3. Verify no more response_format errors") + print() + print("✅ PROJECT CLEANUP COMPLETED!") + +if __name__ == "__main__": + generate_cleanup_summary() diff --git a/MCP_CONNECTOR_GUIDE.md b/MCP_CONNECTOR_GUIDE.md new file mode 100644 index 0000000..18b6d57 --- /dev/null +++ b/MCP_CONNECTOR_GUIDE.md @@ -0,0 +1,157 @@ +# GPTEdit MCP Connector 설정 가이드 + +## 📍 Claude Desktop 설정 파일 위치 +- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json` +- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` +- **Linux**: `~/.config/claude/claude_desktop_config.json` + +## ⚙️ MCP Connector 설정 + +### 기본 설정 (Python 직접 실행) +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"], + "env": { + "PYTHONPATH": "D:\\Project\\little-fairy\\gptedit" + } + } + } +} +``` + +### 가상환경 사용 시 +```json +{ + "mcpServers": { + "gptedit": { + "command": "D:\\Project\\little-fairy\\gptedit\\venv\\Scripts\\python.exe", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"], + "env": { + "PYTHONPATH": "D:\\Project\\little-fairy\\gptedit" + } + } + } +} +``` + +### 배치 파일 사용 시 +```json +{ + "mcpServers": { + "gptedit": { + "command": "cmd", + "args": ["/c", "D:\\Project\\little-fairy\\gptedit\\run.bat"] + } + } +} +``` + +## 🔧 환경 변수 설정 (선택사항) + +MCP connector에서 직접 환경 변수를 설정할 수도 있습니다: + +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"], + "env": { + "PYTHONPATH": "D:\\Project\\little-fairy\\gptedit", + "OPENAI_API_KEY": "sk-xxxxx", + "OUTPUT_FILENAME_PREFIX": "gptimage1", + "GENERATED_IMAGES_PATH": "D:\\Project\\little-fairy\\gptedit\\generated_images", + "LOG_LEVEL": "INFO", + "MAX_IMAGE_SIZE_MB": "4", + "DEFAULT_TIMEOUT": "30", + "ENABLE_AUTO_OPTIMIZE": "true", + "SAVE_ORIGINALS": "true", + "SAVE_PARAMETERS": "true" + } + } + } +} +``` + +## 📝 다중 MCP 서버 설정 + +여러 MCP 서버를 함께 사용하는 경우: + +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"] + }, + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "D:\\Project\\little-fairy"] + }, + "imagen4": { + "command": "python", + "args": ["D:\\Project\\imagen4\\main.py"] + } + } +} +``` + +## 🚨 주의사항 + +1. **경로 구분자**: Windows에서는 `\\` 또는 `/` 모두 사용 가능 +2. **Python 경로**: 시스템 Python 또는 가상환경 Python 경로 확인 +3. **권한**: 스크립트 실행 권한 확인 +4. **로그 확인**: 문제 발생 시 `gptedit.log` 파일 확인 + +## 🔍 연결 테스트 + +Claude Desktop에서 다음 명령으로 연결 확인: + +1. **도구 목록 확인**: + - Claude에게 "What tools are available?" 물어보기 + - `edit_image`, `edit_with_mask`, `batch_edit` 등이 나타나야 함 + +2. **간단한 테스트**: + ``` + "Validate the image at D:/test.png" + ``` + +3. **로그 확인**: + - `D:\Project\little-fairy\gptedit\gptedit.log` 파일에서 연결 로그 확인 + +## 🔄 서버 재시작 + +설정 변경 후: +1. Claude Desktop 완전 종료 (시스템 트레이 확인) +2. Claude Desktop 재시작 +3. 새 대화 시작 + +## 📋 체크리스트 + +- [ ] `.env` 파일에 OPENAI_API_KEY 설정됨 +- [ ] `generated_images/` 디렉토리 존재 +- [ ] Python 및 필요 패키지 설치됨 +- [ ] `claude_desktop_config.json` 파일 설정됨 +- [ ] Claude Desktop 재시작됨 + +## 🐛 문제 해결 + +### "Server disconnected" 에러 +1. Python 경로 확인 +2. 의존성 설치 확인: `pip install -r requirements.txt` +3. `.env` 파일 확인 + +### "Method not found" 에러 +1. 최신 코드인지 확인 +2. `list_prompts`, `list_resources` 메서드 구현 확인 + +### 파일을 찾을 수 없음 +1. `generated_images/` 디렉토리 생성 확인 +2. 파일 권한 확인 + +--- + +이 가이드를 따라 Claude Desktop과 GPTEdit MCP 서버를 연결하세요. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b6a65ca --- /dev/null +++ b/README.md @@ -0,0 +1,213 @@ +# GPTEdit - OpenAI Image Editing MCP Server + +GPTEdit는 OpenAI의 이미지 편집 API를 MCP(Model Context Protocol) 서버로 구현한 프로젝트입니다. +Claude Desktop과 연동하여 자연어로 이미지를 편집할 수 있습니다. + +## 🚀 Quick Start + +### 1. 설치 +```bash +# 저장소 클론 +git clone https://github.com/yourusername/gptedit.git +cd gptedit + +# 의존성 설치 +pip install -r requirements.txt + +# 환경 변수 설정 +copy .env.example .env +# .env 파일을 열어 OPENAI_API_KEY 설정 +``` + +### 2. 실행 +```bash +# Windows - 배치 파일 +run.bat + +# Windows - PowerShell +.\run.ps1 + +# 직접 실행 +python main.py +``` + +### 3. Claude Desktop 연동 +`%APPDATA%\Claude\claude_desktop_config.json` 파일 편집: +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"] + } + } +} +``` + +자세한 설정은 [MCP Connector Guide](MCP_CONNECTOR_GUIDE.md) 참조 + +## 📁 디렉토리 구조 + +### 입력 및 출력 디렉토리 +- **INPUT_PATH**: `input_images/` - 편집할 원본 이미지 저장 +- **GENERATED_IMAGES_PATH**: `generated_images/` - 편집 결과 저장 + +### 파일 구조 예시 +``` +input_images/ # 원본 이미지 보관 +├── photo.jpg +├── portrait.png +└── mask.png + +generated_images/ # 편집 결과물 +├── gptimage1_20250824_143022_000.png # 원본 복사본 +├── gptimage1_20250824_143022_001.png # 편집된 출력 +└── gptimage1_20250824_143022_001.json # 편집 파라미터 +``` + +### 파일명 형식 +- **Base Name**: `gptimage1_{yyyymmdd}_{hhmmss}` +- **원본**: `{base_name}_000.png` (INPUT_PATH에서 복사) +- **편집본**: `{base_name}_001.png`, `{base_name}_002.png`, ... +- **파라미터**: `{base_name}_001.json`, `{base_name}_002.json`, ... + +## 🛠️ 사용 가능한 도구 + +### 1. edit_image_from_file (권장 ⭐) +INPUT_PATH에서 이미지를 읽어 편집하고 GENERATED_IMAGES_PATH에 저장합니다. +``` +예: edit_image_from_file("photo.jpg", "Make the sky more dramatic") +사용법: +1. photo.jpg를 input_images/에 배치 +2. 명령 실행 +3. generated_images/에 결과 생성 +``` + +### 2. edit_with_mask_from_file (권장 ⭐) +INPUT_PATH에서 이미지와 마스크를 읽어 선택적 편집을 수행합니다. +``` +예: edit_with_mask_from_file("photo.jpg", "mask.png", "Replace the background") +사용법: +1. photo.jpg와 mask.png를 input_images/에 배치 +2. 명령 실행 +3. generated_images/에 결과 생성 +``` + +### 3. edit_image +Base64 형식의 이미지를 받아 지정된 프롬프트로 편집합니다. +``` +예: "Make the sky more dramatic" (image_data_b64 포함) +``` + +### 4. edit_with_mask +마스크를 사용하여 특정 영역만 편집합니다. +``` +예: "Replace the background using mask" (image_data_b64 + mask_data_b64 포함) +``` + +### 4. batch_edit +여러 이미지를 한 번에 편집합니다. (최대 16개) +``` +예: "Apply vintage filter to all images" +``` + +### 5. validate_image +이미지가 편집 가능한지 검증합니다. +``` +예: "Check if image.png is valid for editing" +``` + +### 6. create_mask_from_alpha +PNG 알파 채널에서 마스크를 생성합니다. +``` +예: "Create mask from transparent areas" +``` + +### 7. move_temp_to_output +temp 디렉토리에서 output 디렉토리로 파일을 이동합니다. +``` +예: 수동 파일 관리 +``` + +## ⚙️ 환경 변수 설정 + +`.env` 파일: +```bash +# 필수 +OPENAI_API_KEY=sk-xxxxx + +# 디렉토리 설정 +INPUT_PATH=./input_images # 원본 이미지 디렉토리 +GENERATED_IMAGES_PATH=./generated_images # 결과 저장 디렉토리 + +# 선택사항 +OPENAI_ORGANIZATION=org-xxxxx +OUTPUT_FILENAME_PREFIX=gptimage1 +MAX_IMAGE_SIZE_MB=4 +DEFAULT_TIMEOUT=30 +ENABLE_AUTO_OPTIMIZE=true +SAVE_ORIGINALS=true +SAVE_PARAMETERS=true +LOG_LEVEL=INFO +``` + +전체 설정 옵션은 [Setup Guide](SETUP_GUIDE.md) 참조 + +## 📊 주요 기능 + +- ✅ **자동 이미지 최적화**: 4MB 이상 이미지 자동 압축 +- ✅ **토큰 관리**: 프롬프트 길이 자동 조절 +- ✅ **배치 처리**: 최대 16개 이미지 동시 편집 +- ✅ **마스크 지원**: 특정 영역만 선택적 편집 +- ✅ **파라미터 저장**: 모든 편집 내역 JSON으로 저장 +- ✅ **투명 배경**: transparent/opaque 배경 선택 + +## 🔍 문제 해결 + +### Server disconnected 오류 +1. Python 경로 확인 +2. `.env` 파일의 API 키 확인 +3. `requirements.txt` 설치 확인 + +### Method not found 오류 +1. 최신 코드 확인 +2. Claude Desktop 재시작 + +### 파일을 찾을 수 없음 +1. `generated_images/` 디렉토리 확인 +2. 파일 권한 확인 + +자세한 문제 해결은 [Setup Guide](SETUP_GUIDE.md#troubleshooting) 참조 + +## 📚 문서 + +- **[CLAUDE.md](CLAUDE.md)** - MCP 서버 개발 가이드 및 설계 원칙 +- **[TECHNICAL_SPECS.md](TECHNICAL_SPECS.md)** - 기술 사양 및 API 상세 정보 +- **[MCP_CONNECTOR_GUIDE.md](MCP_CONNECTOR_GUIDE.md)** - Claude Desktop 연동 가이드 +- **[SETUP_GUIDE.md](SETUP_GUIDE.md)** - 상세 설치 및 설정 가이드 + +## 🔧 기술 사양 + +- **모델**: OpenAI GPT-Image-1 +- **지원 크기**: 256x256, 512x512, 1024x1024, 1024x1536, 1536x1024 +- **입력 형식**: PNG, JPEG, WebP, GIF, BMP +- **최대 크기**: 4MB (자동 최적화 지원) +- **토큰 제한**: 1000 토큰 (1024x1024 기준) + +자세한 사양은 [Technical Specifications](TECHNICAL_SPECS.md) 참조 + +## 📄 라이선스 + +MIT License + +## 🤝 기여 + +Pull Request 환영합니다! + +## 📞 지원 + +Issues 탭에서 문제를 보고해주세요. + +--- + +Made with ❤️ for Claude Desktop MCP ecosystem diff --git a/SETUP_GUIDE.md b/SETUP_GUIDE.md new file mode 100644 index 0000000..b2651fd --- /dev/null +++ b/SETUP_GUIDE.md @@ -0,0 +1,222 @@ +# GPTEdit MCP Server Setup Guide + +## 🚀 Quick Setup + +### 1. Install GPTEdit + +```bash +# Clone or navigate to the project +cd D:\Project\little-fairy\gptedit + +# Install dependencies +pip install -r requirements.txt +``` + +### 2. Configure API Key + +Create a `.env` file in the project directory: + +```env +# Required +OPENAI_API_KEY=sk-your-api-key-here + +# Directory paths (optional) +INPUT_PATH=./input_images +GENERATED_IMAGES_PATH=./generated_images +``` + +### 3. Configure Claude Desktop + +Add GPTEdit to your Claude Desktop configuration file: + +**Windows:** `%APPDATA%\Claude\claude_desktop_config.json` +**macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json` + +```json +{ + "mcpServers": { + "gptedit": { + "command": "python", + "args": ["D:\\Project\\little-fairy\\gptedit\\main.py"] + } + } +} +``` + +### 4. Restart Claude Desktop + +After saving the configuration, restart Claude Desktop completely. + +## ⚙️ Configuration Options + +### Environment Variables + +Create a `.env` file in the project root: + +```env +# Required +OPENAI_API_KEY=sk-your-api-key-here + +# Directory Configuration (optional) +INPUT_PATH=./input_images # Source images directory +GENERATED_IMAGES_PATH=./generated_images # Output directory + +# File naming and processing (optional with defaults) +OUTPUT_FILENAME_PREFIX=gptimage1 +MAX_IMAGE_SIZE_MB=4 +DEFAULT_TIMEOUT=30 +ENABLE_AUTO_OPTIMIZE=true +SAVE_ORIGINALS=true +SAVE_PARAMETERS=true +LOG_LEVEL=INFO +``` + +### Environment Variable Reference + +| Variable | Description | Default | Example | +|----------|-------------|---------|---------| +| `OPENAI_API_KEY` | **Required** - Your OpenAI API key | - | `sk-xxxxx` | +| `INPUT_PATH` | Directory for source images | `./input_images` | `./my_images` | +| `GENERATED_IMAGES_PATH` | Directory for output files | `./generated_images` | `./results` | +| `OUTPUT_FILENAME_PREFIX` | Prefix for output files | `gptimage1` | `my_edit` | +| `MAX_IMAGE_SIZE_MB` | Auto-optimize threshold | `4` | `1-10` | +| `DEFAULT_TIMEOUT` | API request timeout (seconds) | `30` | `60` | +| `ENABLE_AUTO_OPTIMIZE` | Auto WebP conversion | `true` | `true/false` | +| `SAVE_ORIGINALS` | Copy input images to output | `true` | `true/false` | +| `SAVE_PARAMETERS` | Save JSON parameters | `true` | `true/false` | +| `LOG_LEVEL` | Logging level | `INFO` | `DEBUG`, `WARNING` | + +## 📁 File Structure + +After running, GPTEdit creates this structure: + +``` +gptedit/ +├── input_images/ # Source images (INPUT_PATH) +│ ├── photo.jpg +│ ├── portrait.png +│ └── mask.png +├── generated_images/ # All output files (GENERATED_IMAGES_PATH) +│ ├── gptimage1_20250824_143022_000.png # Original (copied from input) +│ ├── gptimage1_20250824_143022_001.png # Edited output +│ └── gptimage1_20250824_143022_001.json # Edit parameters +├── temp/ # Temporary files (auto-cleaned) +└── gptedit.log # Debug log +``` + +## 🎯 Usage Examples in Claude + +### Method 1: File-based editing (Recommended) +``` +I placed photo.jpg in the input_images folder. +Can you edit it using edit_image_from_file to make it more vibrant? +``` + +### Method 2: Direct upload editing +``` +I have an image I'd like to edit. Can you make it more colorful and vibrant? +[Upload image to Claude] +``` + +### Method 3: Mask-based editing +``` +I have photo.jpg and mask.png in input_images/. +Can you use edit_with_mask_from_file to replace only the background? +``` + +## 🔍 Troubleshooting + +### Common Issues and Solutions + +#### "Server disconnected" +1. Check Python is installed: `python --version` +2. Verify dependencies: `pip list | grep mcp` +3. Check `.env` file exists with API key +4. Look at `gptedit.log` for errors + +#### "API key not found" +1. Ensure `.env` file is in project root (same folder as `main.py`) +2. Check API key format: `OPENAI_API_KEY=sk-xxxxx` +3. No quotes needed around the key + +#### "Method not found" +1. Update to latest code: `git pull` +2. Reinstall dependencies: `pip install -r requirements.txt` +3. Restart Claude Desktop completely + +#### "Image too large" +- Enable auto-optimization: `ENABLE_AUTO_OPTIMIZE=true` +- Or increase limit: `MAX_IMAGE_SIZE_MB=8` + +#### "Cannot find output images" +- Check `generated_images/` folder +- Files named: `gptimage1_{seed}_{date}_{time}_{number}.png` +- Look for most recent by timestamp + +### Verify Installation + +Test the setup: +```bash +python main.py +``` + +You should see: +``` +Starting GPTEdit MCP Server +GPTEdit MCP Server is running... +Ready to process image editing requests +``` + +Press `Ctrl+C` to stop. + +## 🛡️ Security Best Practices + +1. **API Key Management** + - Store API key only in `.env` file + - Never commit `.env` to version control + - Add `.env` to `.gitignore` + +2. **File Permissions** + - Ensure `generated_images/` is writable + - Keep sensitive files in project directory only + +3. **Logging** + - Use `INFO` level for normal operation + - `DEBUG` only for troubleshooting + - Rotate logs periodically + +## 📚 Additional Resources + +- [OpenAI API Documentation](https://platform.openai.com/docs) +- [MCP Protocol Specification](https://modelcontextprotocol.io) +- [Project README](README.md) +- [Technical Specifications](TECHNICAL_SPECS.md) + +## 💡 Tips + +1. **Performance** + - Smaller images (512x512) process faster + - Batch editing is more efficient than individual edits + - WebP format reduces file sizes significantly + +2. **Quality** + - Use clear, specific prompts + - Provide masks for precise edits + - Save parameters for reproducibility + +3. **Organization** + - Files are grouped by session (same seed) + - JSON parameters allow replay of edits + - Timestamps help track edit history + +## 🆘 Support + +For issues: +1. Check `gptedit.log` for detailed errors +2. Verify OpenAI API key has credits +3. Ensure all paths are accessible +4. Create an issue on GitHub + +--- + +Happy editing with GPTEdit! 🎨 diff --git a/TECHNICAL_SPECS.md b/TECHNICAL_SPECS.md new file mode 100644 index 0000000..1c1701f --- /dev/null +++ b/TECHNICAL_SPECS.md @@ -0,0 +1,207 @@ +# GPTEdit Technical Specifications + +## OpenAI API 설정 + +### 고정 파라미터 (변경 불가) +OpenAI API 요구사항에 따라 다음 파라미터들은 고정값입니다: + +```python +MODEL = "gpt-image-1" # OpenAI 이미지 편집 모델 +INPUT_FIDELITY = "high" # 입력 이미지 충실도 +QUALITY = "high" # 출력 품질 +NUMBER_OF_IMAGES = 1 # 생성 이미지 수 (편집은 1개만 지원) +OUTPUT_FORMAT = "png" # 출력 형식 +PARTIAL_IMAGES = 0 # 부분 이미지 (미지원) +RESPONSE_FORMAT = "b64_json" # API 응답 형식 +``` + +### 지원 이미지 크기 +OpenAI API가 지원하는 출력 크기: +- `256x256` - 가장 빠름, 토큰 제한 낮음 +- `512x512` - 균형잡힌 선택 +- `1024x1024` - 고품질 (기본값) +- `1024x1536` - 세로형 고품질 +- `1536x1024` - 가로형 고품질 + +### 토큰 제한 +프롬프트 길이는 이미지 크기에 따라 제한됩니다: +- `256x256`: ~1000 토큰 +- `512x512`: ~1000 토큰 +- `1024x1024`: ~1000 토큰 +- `1024x1536`, `1536x1024`: ~750 토큰 + +## 이미지 처리 사양 + +### 입력 이미지 +- **지원 형식**: PNG, JPEG, WebP, GIF, BMP +- **최대 크기**: 4MB (자동 최적화 가능) +- **최대 해상도**: 제한 없음 (자동 리사이징) +- **알파 채널**: PNG 투명도 지원 + +### 자동 최적화 +4MB 이상 이미지는 자동으로 최적화됩니다: +1. WebP 압축 시도 (최고 품질 유지) +2. PNG 최적화 (WebP 실패 시) +3. 단계적 품질 감소 (95% → 85% → 75%) +4. 최종 수단: 해상도 축소 + +### 마스크 이미지 +- **형식**: PNG 권장 (흑백) +- **크기**: 입력 이미지와 동일할 필요 없음 (자동 조정) +- **색상**: 흰색(255) = 편집 영역, 검정색(0) = 보존 영역 + +## API 제한사항 + +### Rate Limits +- **분당 요청**: 50 requests/min (Tier 2) +- **일일 요청**: 제한 없음 (크레딧 기반) +- **동시 요청**: 최대 5개 + +### 배치 처리 +- **최대 배치 크기**: 16개 이미지 +- **동시 처리**: asyncio 기반 병렬 처리 +- **실패 처리**: 개별 실패는 전체 배치에 영향 없음 + +## 파일 시스템 사양 + +### 디렉토리 구조 +``` +project_root/ +├── input_images/ # INPUT_PATH - 편집할 원본 이미지 +│ ├── photo.jpg +│ └── mask.png +├── generated_images/ # GENERATED_IMAGES_PATH - 편집 결과 +│ ├── gptimage1_20250824_143022_000.png # 원본 복사본 +│ ├── gptimage1_20250824_143022_001.png # 편집 결과 +│ └── gptimage1_20250824_143022_001.json # 메타데이터 +└── temp/ # 임시 파일 (자동 정리) +``` + +### 파일명 구조 +``` +gptimage1_{yyyymmdd}_{hhmmss}_{number}.{ext} +``` + +- `gptimage1`: 구성 가능한 prefix +- `{yyyymmdd}`: 날짜 (예: 20250824) +- `{hhmmss}`: 시간 (예: 143022) +- `{number}`: + - `000`: 원본 복사본 (입력 파일) + - `001-999`: 편집 결과 파일 +- `{ext}`: 파일 확장자 + +### JSON 파라미터 구조 +```json +{ + "base_name": "gptimage1_20250824_143022", + "timestamp": "2025-08-24T14:30:22.123456", + "prompt": "사용자 프롬프트", + "background": "transparent|opaque", + "input_image_name": "photo.jpg", + "input_temp_path": "/path/to/temp/photo.jpg", + "input_generated_path": "/path/to/generated/gptimage1_20250824_143022_000.png", + "input_size": [width, height], + "output_size": [width, height], + "execution_time": 3.45, + "optimization": { + "optimized": true, + "original_size_mb": 5.2, + "final_size_mb": 3.8, + "format_used": "PNG|WEBP", + "method": "압축 방법" + }, + "token_stats": { + "estimated_tokens": 450, + "token_limit": 1000, + "usage_percentage": 45.0 + }, + "config": { + "model": "gpt-image-1", + "quality": "high", + "api_version": "gpt-image-1" + } +} +``` + +## 성능 사양 + +### 처리 시간 +- **일반 편집**: 2-5초 +- **최적화 포함**: 3-7초 +- **배치 처리**: 병렬 처리로 선형 증가 방지 + +### 메모리 사용 +- **기본**: ~100MB +- **대용량 이미지**: 최대 500MB +- **배치 처리**: 이미지당 ~50MB 추가 + +## 에러 처리 + +### 에러 타입 분류 +```python +class EditErrorType(Enum): + QUOTA_EXCEEDED = "quota_exceeded" # API 한도 초과 + INVALID_IMAGE = "invalid_image" # 잘못된 이미지 + AUTHENTICATION = "authentication" # 인증 실패 + TIMEOUT = "timeout" # 시간 초과 + NETWORK = "network" # 네트워크 오류 + SERVICE_UNAVAILABLE = "service_unavailable" # 서비스 불가 + INVALID_REQUEST = "invalid_request" # 잘못된 요청 + TOKEN_LIMIT_EXCEEDED = "token_limit_exceeded" # 토큰 초과 + UNKNOWN = "unknown" # 알 수 없는 오류 +``` + +### 자동 복구 +- **타임아웃**: 30초 후 자동 재시도 +- **네트워크 오류**: 3회 재시도 (지수 백오프) +- **이미지 최적화**: 실패 시 다른 형식 시도 + +## 보안 사양 + +### API 키 관리 +- `.env` 파일에만 저장 +- 로그에 마스킹 처리 (마지막 4자리만 표시) +- 환경 변수 우선순위 적용 + +### 파일 접근 +- 지정된 디렉토리만 접근 가능 +- 심볼릭 링크 따라가지 않음 +- 파일 권한 검증 + +## 호환성 + +### Python 버전 +- **최소**: Python 3.8 +- **권장**: Python 3.10+ +- **테스트**: Python 3.11, 3.12 + +### 운영체제 +- **Windows**: 10, 11 (테스트 완료) +- **macOS**: 12+ (Monterey 이상) +- **Linux**: Ubuntu 20.04+ (테스트 완료) + +### 의존성 +``` +openai>=1.51.0 # OpenAI API 클라이언트 +mcp>=0.1.0 # Model Context Protocol +pillow>=10.0.0 # 이미지 처리 +python-dotenv>=1.0.0 # 환경 변수 관리 +aiofiles>=23.0.0 # 비동기 파일 I/O +``` + +## 확장 가능성 + +### 플러그인 시스템 +향후 플러그인 시스템 추가 예정: +- 커스텀 이미지 필터 +- 전처리/후처리 파이프라인 +- 외부 서비스 통합 + +### API 버전 관리 +- 현재: `gpt-image-1` +- 향후: 새 모델 출시 시 자동 감지 및 전환 + +--- + +이 문서는 GPTEdit의 기술적 세부사항을 담고 있습니다. +개발 시 참고하시기 바랍니다. diff --git a/WORKFLOW.md b/WORKFLOW.md new file mode 100644 index 0000000..29ed476 --- /dev/null +++ b/WORKFLOW.md @@ -0,0 +1,190 @@ +# GPTEdit MCP Server - Directory-based Processing Workflow + +## 개요 +이 문서는 GPTEdit MCP 서버의 INPUT_PATH 기반 이미지 편집 및 처리 워크플로우를 설명합니다. + +## 워크플로우 + +### 1. File-based Edit Processing (권장) +``` +Client -> MCP Server: edit_image_from_file tool +├── input_image_name: "photo.jpg" +├── prompt: "edit description" +└── MCP Server Process: + ├── 1. INPUT_PATH/photo.jpg 읽기 + ├── 2. base64 변환 + ├── 3. 이미지 편집 (OpenAI API) + ├── 4. GENERATED_IMAGES_PATH에 저장: + │ ├── {base_name}_000.png (원본 복사) + │ └── {base_name}_001.png (편집본) + └── 5. 응답: 편집된 이미지 + 메타데이터 +``` + +### 2. Mask-based Edit Processing (권장) +``` +Client -> MCP Server: edit_with_mask_from_file tool +├── input_image_name: "photo.jpg" +├── mask_image_name: "mask.png" +├── prompt: "edit description" +└── MCP Server Process: + ├── 1. INPUT_PATH에서 파일들 읽기: + │ ├── INPUT_PATH/photo.jpg + │ └── INPUT_PATH/mask.png + ├── 2. base64 변환 + ├── 3. 마스크 이미지 편집 (OpenAI API) + ├── 4. GENERATED_IMAGES_PATH에 저장: + │ ├── {base_name}_000.png (원본) + │ ├── {base_name}_mask_000.png (마스크) + │ └── {base_name}_001.png (편집본) + └── 5. 응답: 편집된 이미지 + 메타데이터 +``` + +### 3. Direct Upload Processing (선택사항) +``` +Client -> MCP Server: edit_image tool +├── input_image_b64: base64 encoded image +├── prompt: "edit description" +└── MCP Server Process: + ├── 1. base64 데이터 처리 + ├── 2. 임시 파일 생성 + ├── 3. 이미지 편집 (OpenAI API) + ├── 4. GENERATED_IMAGES_PATH에 저장 + └── 5. 응답: 편집된 이미지 + 메타데이터 +``` + +### 4. Directory Structure +``` +Input Directory (INPUT_PATH): +├── photo.jpg # 원본 이미지 +├── portrait.png # 원본 이미지 +└── mask.png # 마스크 이미지 + +Temp Directory (temp/): +├── temp_image_1234.png # 임시 처리 파일들 +└── [기타 임시 파일들] + +Output Directory (GENERATED_IMAGES_PATH): +├── gptimage1_20250824_143022_000.png # 원본 복사본 +├── gptimage1_20250824_143022_001.png # 편집된 이미지 +├── gptimage1_20250824_143022_001.json # 메타데이터 +└── [기타 결과 파일들] +``` + +## 주요 구성 요소 + +### 1. File-based Edit Tools +- **edit_image_from_file**: INPUT_PATH에서 파일명 기반 편집 +- **edit_with_mask_from_file**: 마스크를 사용한 파일명 기반 편집 + +### 2. Direct Upload Tools +- **edit_image**: Base64 데이터 직접 편집 +- **edit_with_mask**: Base64 데이터와 마스크 편집 + +### 3. Utility Tools +- **move_temp_to_output**: 임시에서 출력 디렉토리로 파일 이동 +- **validate_image**: 이미지 파일 유효성 검증 +- **create_mask_from_alpha**: PNG 알파 채널에서 마스크 생성 + +### 4. Directory Configuration +``` +project_root/ +├── input_images/ # INPUT_PATH (원본 이미지) +│ ├── photo.jpg +│ └── mask.png +├── generated_images/ # GENERATED_IMAGES_PATH (출력 파일) +│ ├── gptimage1_*_000.png # 원본 복사 +│ ├── gptimage1_*_001.png # 편집된 이미지 +│ └── gptimage1_*_001.json # 메타데이터 +└── temp/ # 임시 파일 (자동 정리) +``` + +## API 사용 예시 + +### 1. File-based Edit (추천) +```json +{ + "method": "tools/call", + "params": { + "name": "edit_image_from_file", + "arguments": { + "input_image_name": "photo.jpg", + "prompt": "Add a sunset background", + "background": "transparent", + "save_to_file": true + } + } +} +``` + +### 2. Mask-based File Edit (추천) +```json +{ + "method": "tools/call", + "params": { + "name": "edit_with_mask_from_file", + "arguments": { + "input_image_name": "portrait.jpg", + "mask_image_name": "face_mask.png", + "prompt": "Change hair color to blonde", + "background": "transparent" + } + } +} +``` + +### 3. Direct Upload Edit +```json +{ + "method": "tools/call", + "params": { + "name": "edit_image", + "arguments": { + "input_image_b64": "iVBORw0KGgoAAAANSUhEUgAA...", + "prompt": "Add a sunset background", + "background": "transparent" + } + } +} +``` + +## 특징 + +### 1. 자동 파일 관리 +- **임시 디렉토리**: 업로드된 파일의 일시적 저장 +- **출력 디렉토리**: 편집된 결과의 영구 저장 +- **자동 정리**: 선택적 파일 이동/복사 + +### 2. 유연한 파일명 처리 +- 업로드시 파일명 지정 가능 +- 자동 파일명 생성 (타임스탬프 기반) +- 파일 확장자 자동 감지 + +### 3. 오류 처리 +- 파일 존재 확인 +- 권한 오류 처리 +- 디렉토리 자동 생성 + +## 설정 + +### Environment Variables +```bash +INPUT_PATH=./input_images # 원본 이미지 디렉토리 +GENERATED_IMAGES_PATH=./generated_images # 출력 디렉토리 +SAVE_PARAMETERS=true # 메타데이터 저장 +SAVE_ORIGINALS=true # 원본 복사 저장 +``` + +### Default Paths +```python +INPUT_PATH = project_root/input_images +GENERATED_IMAGES_PATH = project_root/generated_images +TEMP_PATH = project_root/temp +``` + +### 사용 전 준비 +1. **디렉토리 생성**: 자동으로 input_images/, generated_images/ 디렉토리 생성 +2. **이미지 배치**: 편집할 이미지를 input_images/에 배치 +3. **MCP 호출**: edit_image_from_file 또는 edit_with_mask_from_file 사용 +4. **결과 확인**: generated_images/에서 편집 결과 확인 + +이제 MCP 서버는 INPUT_PATH에서 이미지를 읽어 편집하고, GENERATED_IMAGES_PATH에 결과를 저장하는 디렉토리 기반 이미지 편집 시스템입니다. diff --git a/cleanup_project.py b/cleanup_project.py new file mode 100644 index 0000000..bb54d89 --- /dev/null +++ b/cleanup_project.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Clean up script for GPTEdit project +Removes unnecessary files safely +""" + +import os +import shutil +from pathlib import Path + +def cleanup_project(root_dir): + """Clean up unnecessary files""" + root_path = Path(root_dir) + + print(f"🧹 Cleaning up project: {root_path}") + + # Files and directories to remove + cleanup_targets = [ + # Backup files + "src/connector/openai_client_backup.py.disabled", + "tests/image_utils_backup.py", + + # Debug and test files in root + "debug_gptedit.py", + "debug_path.py", + "quick_test.py", + "replay_edit.py", + "test_api_key.py", + "test_size_optimization.py", + "test_verification.py", + "clear_cache.py", + "search_response_format.py", + + # Temporary files + "temp/fairy_image.png", + "temp/imagen4.png", + "temp/optimized_imagen4.png", + + # Python cache directories + "src/__pycache__", + "src/connector/__pycache__", + "src/server/__pycache__", + "src/utils/__pycache__" + ] + + removed_count = 0 + + for target in cleanup_targets: + target_path = root_path / target + + if target_path.exists(): + try: + if target_path.is_dir(): + shutil.rmtree(target_path) + print(f"✅ Removed directory: {target}") + else: + target_path.unlink() + print(f"✅ Removed file: {target}") + removed_count += 1 + except Exception as e: + print(f"❌ Failed to remove {target}: {e}") + else: + print(f"⚠️ Not found: {target}") + + print(f"\n🎉 Cleanup complete! Removed {removed_count} items") + + # Show remaining structure + print(f"\n📁 Remaining project structure:") + essential_files = [ + "main.py", + "requirements.txt", + ".env", + ".env.example", + "README.md", + "src/", + "input_images/", + "generated_images/", + "tests/" + ] + + for item in essential_files: + item_path = root_path / item + if item_path.exists(): + print(f" ✅ {item}") + else: + print(f" ❌ {item} (missing)") + +if __name__ == "__main__": + script_dir = Path(__file__).parent + + print("=" * 60) + print("GPTEdit Project Cleanup") + print("=" * 60) + + cleanup_project(script_dir) + + print("\n" + "=" * 60) + print("✅ PROJECT CLEANUP COMPLETED!") + print("Ready to restart the MCP server") + print("=" * 60) diff --git a/clear_cache.py b/clear_cache.py new file mode 100644 index 0000000..0f5920f --- /dev/null +++ b/clear_cache.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Clear Python cache and restart script for GPTEdit +Run this script to clear all __pycache__ directories and .pyc files +""" + +import os +import shutil +import sys +from pathlib import Path + +def clear_python_cache(root_dir): + """Clear all Python cache files and directories""" + root_path = Path(root_dir) + + print(f"🧹 Clearing Python cache in: {root_path}") + + # Find and remove __pycache__ directories + pycache_dirs = list(root_path.rglob("__pycache__")) + + for pycache_dir in pycache_dirs: + try: + shutil.rmtree(pycache_dir) + print(f"✅ Removed: {pycache_dir}") + except Exception as e: + print(f"❌ Failed to remove {pycache_dir}: {e}") + + # Find and remove .pyc files + pyc_files = list(root_path.rglob("*.pyc")) + + for pyc_file in pyc_files: + try: + pyc_file.unlink() + print(f"✅ Removed: {pyc_file}") + except Exception as e: + print(f"❌ Failed to remove {pyc_file}: {e}") + + print(f"🎉 Cache clearing complete!") + +if __name__ == "__main__": + # Get the directory of this script + script_dir = Path(__file__).parent + + print("=" * 50) + print("GPTEdit Python Cache Cleaner") + print("=" * 50) + + clear_python_cache(script_dir) + + print("\n" + "=" * 50) + print("✅ CACHE CLEARED SUCCESSFULLY!") + print("Please restart your MCP server/application") + print("=" * 50) diff --git a/debug_gptedit.py b/debug_gptedit.py new file mode 100644 index 0000000..dbbcf4c --- /dev/null +++ b/debug_gptedit.py @@ -0,0 +1 @@ +# REMOVED: Debug file cleaned up during project organization diff --git a/debug_path.py b/debug_path.py new file mode 100644 index 0000000..dbbcf4c --- /dev/null +++ b/debug_path.py @@ -0,0 +1 @@ +# REMOVED: Debug file cleaned up during project organization diff --git a/final_cleanup.bat b/final_cleanup.bat new file mode 100644 index 0000000..001be81 --- /dev/null +++ b/final_cleanup.bat @@ -0,0 +1,29 @@ +@echo off +echo ========================================== +echo GPTEdit Project Final Cleanup +echo ========================================== + +echo 🧹 Removing Python cache files... +if exist "src\__pycache__" rmdir /s /q "src\__pycache__" +if exist "src\connector\__pycache__" rmdir /s /q "src\connector\__pycache__" +if exist "src\server\__pycache__" rmdir /s /q "src\server\__pycache__" +if exist "src\utils\__pycache__" rmdir /s /q "src\utils\__pycache__" + +echo 🗑️ Removing temporary cleanup files... +if exist "clear_cache.py" del "clear_cache.py" +if exist "search_response_format.py" del "search_response_format.py" +if exist "cleanup_project.py" del "cleanup_project.py" +if exist "temp_delete_marker.txt" del "temp_delete_marker.txt" + +echo 🔄 Cleaning old temp files... +if exist "temp\fairy_image.png" del "temp\fairy_image.png" + +echo ✅ Cleanup completed! +echo. +echo 🚀 Next steps: +echo 1. Restart your MCP server completely +echo 2. Test image editing functionality +echo 3. The response_format error should be resolved +echo. +echo ========================================== +pause diff --git a/input_images/imagen4.png b/input_images/imagen4.png new file mode 100644 index 0000000..d840bf8 Binary files /dev/null and b/input_images/imagen4.png differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..a88b518 --- /dev/null +++ b/main.py @@ -0,0 +1,81 @@ +# GPTEdit - OpenAI Image Editing MCP Server + +import asyncio +import logging +import sys +import json +from pathlib import Path + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent)) + +from src.connector.config import Config +from src.server.mcp_server import GPTEditMCPServer + +# Configure logging to stderr for debugging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('gptedit.log', encoding='utf-8'), + logging.StreamHandler(sys.stderr) # Changed to stderr to avoid interfering with stdout + ] +) + +logger = logging.getLogger(__name__) + + +async def main(): + """Main entry point for GPTEdit MCP Server""" + try: + # Log to stderr to avoid interfering with JSON-RPC communication + logger.info("=" * 60) + logger.info("Starting GPTEdit MCP Server") + logger.info("=" * 60) + + # Load configuration + config = Config() + if not config.validate(): + logger.error("Configuration validation failed") + return 1 + + # Create and start server + mcp_server = GPTEditMCPServer(config) + server = mcp_server.get_server() + + # Log server info + logger.info("GPTEdit MCP Server is running...") + logger.info("Ready to process image editing requests") + logger.info(f"Available tools: edit_image, edit_with_mask, batch_edit, validate_image, create_mask_from_alpha") + + # Use stdio transport with proper stream handling + from mcp.server.stdio import stdio_server + + async with stdio_server() as (read_stream, write_stream): + try: + await server.run( + read_stream, + write_stream, + server.create_initialization_options() + ) + except Exception as e: + logger.error(f"Server error: {e}", exc_info=True) + raise + + except KeyboardInterrupt: + logger.info("Server shutdown requested") + return 0 + except Exception as e: + logger.error(f"Fatal error: {e}", exc_info=True) + return 1 + + +if __name__ == "__main__": + # Ensure clean exit + try: + exit_code = asyncio.run(main()) + except Exception as e: + logger.error(f"Unhandled exception: {e}", exc_info=True) + exit_code = 1 + + sys.exit(exit_code) diff --git a/quick_test.py b/quick_test.py new file mode 100644 index 0000000..b410804 --- /dev/null +++ b/quick_test.py @@ -0,0 +1 @@ +# REMOVED: Quick test file cleaned up during project organization diff --git a/replay_edit.py b/replay_edit.py new file mode 100644 index 0000000..770d682 --- /dev/null +++ b/replay_edit.py @@ -0,0 +1 @@ +# REMOVED: Replay script cleaned up during project organization diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bf251b6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +openai>=1.51.0 +mcp>=0.1.0 +pillow>=10.0.0 +python-dotenv>=1.0.0 +aiofiles>=23.0.0 +pytest>=7.4.0 +pytest-asyncio>=0.21.0 diff --git a/run.bat b/run.bat new file mode 100644 index 0000000..0c8ab89 --- /dev/null +++ b/run.bat @@ -0,0 +1,71 @@ +@echo off +REM GPTEdit MCP Server Launcher +REM This script starts the GPTEdit MCP server with proper environment setup + +echo ======================================== +echo GPTEdit MCP Server Launcher +echo ======================================== +echo. + +REM Change to script directory +cd /d "%~dp0" + +REM Check if Python is available +python --version >nul 2>&1 +if errorlevel 1 ( + echo ERROR: Python is not installed or not in PATH + echo Please install Python 3.8+ and add it to PATH + pause + exit /b 1 +) + +REM Check if .env file exists +if not exist ".env" ( + echo WARNING: .env file not found + echo Creating .env from .env.example... + if exist ".env.example" ( + copy ".env.example" ".env" + echo Please edit .env file with your OPENAI_API_KEY + pause + ) else ( + echo ERROR: .env.example not found + pause + exit /b 1 + ) +) + +REM Check if virtual environment exists +if exist "venv\Scripts\activate.bat" ( + echo Using virtual environment... + call venv\Scripts\activate.bat +) else ( + echo No virtual environment found, using system Python +) + +REM Install/update dependencies if needed +echo Checking dependencies... +pip install -q -r requirements.txt 2>nul + +REM Create necessary directories +if not exist "generated_images" mkdir generated_images +if not exist "temp" mkdir temp + +REM Start the server +echo. +echo Starting GPTEdit MCP Server... +echo ---------------------------------------- +echo Output directory: generated_images\ +echo Log file: gptedit.log +echo ---------------------------------------- +echo. + +python main.py + +REM If server exits, show error +if errorlevel 1 ( + echo. + echo ERROR: Server exited with error code %errorlevel% + echo Check gptedit.log for details +) + +pause diff --git a/run.ps1 b/run.ps1 new file mode 100644 index 0000000..f5c6a96 --- /dev/null +++ b/run.ps1 @@ -0,0 +1,84 @@ +# GPTEdit MCP Server Launcher (PowerShell) +# This script starts the GPTEdit MCP server with proper environment setup + +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "GPTEdit MCP Server Launcher" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "" + +# Change to script directory +Set-Location $PSScriptRoot + +# Check if Python is available +try { + $pythonVersion = python --version 2>&1 + Write-Host "Found: $pythonVersion" -ForegroundColor Green +} catch { + Write-Host "ERROR: Python is not installed or not in PATH" -ForegroundColor Red + Write-Host "Please install Python 3.8+ and add it to PATH" -ForegroundColor Yellow + Read-Host "Press Enter to exit" + exit 1 +} + +# Check if .env file exists +if (-not (Test-Path ".env")) { + Write-Host "WARNING: .env file not found" -ForegroundColor Yellow + if (Test-Path ".env.example") { + Write-Host "Creating .env from .env.example..." -ForegroundColor Yellow + Copy-Item ".env.example" ".env" + Write-Host "Please edit .env file with your OPENAI_API_KEY" -ForegroundColor Yellow + Read-Host "Press Enter to continue" + } else { + Write-Host "ERROR: .env.example not found" -ForegroundColor Red + Read-Host "Press Enter to exit" + exit 1 + } +} + +# Check if virtual environment exists +if (Test-Path "venv\Scripts\Activate.ps1") { + Write-Host "Using virtual environment..." -ForegroundColor Green + & "venv\Scripts\Activate.ps1" +} else { + Write-Host "No virtual environment found, using system Python" -ForegroundColor Yellow +} + +# Install/update dependencies +Write-Host "Checking dependencies..." -ForegroundColor Cyan +pip install -q -r requirements.txt 2>$null + +# Create necessary directories +if (-not (Test-Path "generated_images")) { + New-Item -ItemType Directory -Path "generated_images" | Out-Null + Write-Host "Created generated_images directory" -ForegroundColor Green +} + +if (-not (Test-Path "temp")) { + New-Item -ItemType Directory -Path "temp" | Out-Null + Write-Host "Created temp directory" -ForegroundColor Green +} + +# Display configuration +Write-Host "" +Write-Host "Starting GPTEdit MCP Server..." -ForegroundColor Cyan +Write-Host "----------------------------------------" -ForegroundColor Gray +Write-Host "Output directory: " -NoNewline +Write-Host "generated_images\" -ForegroundColor Yellow +Write-Host "Log file: " -NoNewline +Write-Host "gptedit.log" -ForegroundColor Yellow +Write-Host "Config file: " -NoNewline +Write-Host ".env" -ForegroundColor Yellow +Write-Host "----------------------------------------" -ForegroundColor Gray +Write-Host "" + +# Start the server +try { + python main.py +} catch { + Write-Host "" + Write-Host "ERROR: Server exited with error" -ForegroundColor Red + Write-Host "Check gptedit.log for details" -ForegroundColor Yellow + Write-Host $_.Exception.Message -ForegroundColor Red +} + +Read-Host "Press Enter to exit" diff --git a/search_response_format.py b/search_response_format.py new file mode 100644 index 0000000..c40bf29 --- /dev/null +++ b/search_response_format.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +Search for response_format usage in all Python files +""" + +import os +import re +from pathlib import Path + +def search_response_format(root_dir): + """Search for response_format in all Python files""" + root_path = Path(root_dir) + + print(f"🔍 Searching for 'response_format' in: {root_path}") + + # Find all Python files + py_files = list(root_path.rglob("*.py")) + + found_files = [] + + for py_file in py_files: + try: + with open(py_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Search for response_format (case insensitive) + lines = content.split('\n') + matches = [] + + for line_num, line in enumerate(lines, 1): + if 'response_format' in line.lower(): + matches.append((line_num, line.strip())) + + if matches: + found_files.append((py_file, matches)) + print(f"\n📁 {py_file}") + for line_num, line in matches: + print(f" Line {line_num}: {line}") + + except Exception as e: + print(f"❌ Error reading {py_file}: {e}") + + if not found_files: + print("✅ No 'response_format' found in any Python files") + else: + print(f"\n⚠️ Found 'response_format' in {len(found_files)} files") + + return found_files + +if __name__ == "__main__": + script_dir = Path(__file__).parent + print("=" * 60) + print("GPTEdit response_format Search") + print("=" * 60) + + found = search_response_format(script_dir) + + print("\n" + "=" * 60) + if found: + print("❌ ACTION REQUIRED: Remove response_format from found files") + else: + print("✅ NO ACTION NEEDED: response_format not found") + print("=" * 60) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..ac7cc40 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,3 @@ +"""GPTEdit package""" + +__version__ = "0.1.0" diff --git a/src/connector/__init__.py b/src/connector/__init__.py new file mode 100644 index 0000000..64dcdb8 --- /dev/null +++ b/src/connector/__init__.py @@ -0,0 +1,11 @@ +"""Connector module""" + +from .config import Config +from .openai_client import OpenAIEditClient, ImageEditRequest, ImageEditResponse + +__all__ = [ + 'Config', + 'OpenAIEditClient', + 'ImageEditRequest', + 'ImageEditResponse' +] diff --git a/src/connector/config.py b/src/connector/config.py new file mode 100644 index 0000000..2b8a749 --- /dev/null +++ b/src/connector/config.py @@ -0,0 +1,336 @@ +"""Configuration module for GPTEdit""" + +import os +import logging +import random +from typing import Optional, Tuple +from pathlib import Path +from datetime import datetime +from dotenv import load_dotenv + +logger = logging.getLogger(__name__) + + +class Config: + """Configuration class for GPTEdit""" + + # Fixed parameters as per requirements + MODEL = "gpt-image-1" + INPUT_FIDELITY = "high" + QUALITY = "high" + NUMBER_OF_IMAGES = 1 + OUTPUT_FORMAT = "png" + PARTIAL_IMAGES = 0 + # NOTE: RESPONSE_FORMAT not supported by Image Edit API (only Generation API) + + # Size options (OpenAI supports these sizes for edit) + SUPPORTED_SIZES = ["256x256", "512x512", "1024x1024", "1024x1536", "1536x1024"] + + def __init__(self): + """Initialize configuration""" + # Load environment variables + env_path = Path(__file__).parent.parent.parent / '.env' + if env_path.exists(): + load_dotenv(env_path) + logger.info(f"Loaded environment from {env_path}") + else: + logger.warning(f"No .env file found at {env_path}") + + # API Configuration + self.api_key = os.getenv('OPENAI_API_KEY', '') + self.organization = os.getenv('OPENAI_ORGANIZATION', '') + self.user = os.getenv('OPENAI_USER', '') + + # Server Configuration + self.log_level = os.getenv('LOG_LEVEL', 'INFO') + self.max_image_size_mb = int(os.getenv('MAX_IMAGE_SIZE_MB', '4')) + self.default_timeout = int(os.getenv('DEFAULT_TIMEOUT', '30')) + + # Feature Flags + self.enable_auto_mask = os.getenv('ENABLE_AUTO_MASK', 'false').lower() == 'true' + self.enable_auto_optimize = os.getenv('ENABLE_AUTO_OPTIMIZE', 'true').lower() == 'true' + + # Paths Configuration + self.base_path = Path(__file__).parent.parent.parent + + # Input directory for reading source images + default_input_dir = str(self.base_path / 'input_images') + self.input_path = Path(os.getenv('INPUT_PATH', default_input_dir)) + + # Single output directory for everything + default_output_dir = str(self.base_path / 'generated_images') + self.generated_images_path = Path(os.getenv('GENERATED_IMAGES_PATH', default_output_dir)) + + # File naming configuration + self.output_filename_prefix = os.getenv('OUTPUT_FILENAME_PREFIX', 'gptimage1') + self.save_originals = os.getenv('SAVE_ORIGINALS', 'true').lower() == 'true' + self.save_parameters = os.getenv('SAVE_PARAMETERS', 'true').lower() == 'true' + + # Ensure all required directories exist with proper error handling + self._ensure_directories() + + logger.info(f"Input path: {self.input_path}") + logger.info(f"Generated images path: {self.generated_images_path}") + + def _ensure_directories(self) -> None: + """ + Ensure all required directories exist with proper permissions and error handling + """ + directories = [ + ("input_images", self.input_path), + ("generated_images", self.generated_images_path) + ] + + for dir_name, dir_path in directories: + try: + # Create directory with parents if needed + dir_path.mkdir(parents=True, exist_ok=True) + + # Verify directory is accessible + if not dir_path.exists(): + raise RuntimeError(f"Failed to create {dir_name} directory: {dir_path}") + + if not dir_path.is_dir(): + raise RuntimeError(f"{dir_name} path exists but is not a directory: {dir_path}") + + # Test write permissions by creating a temporary test file + test_file = dir_path / ".gptedit_test_write" + try: + test_file.touch() + test_file.unlink() # Delete test file + except PermissionError: + raise RuntimeError(f"No write permission for {dir_name} directory: {dir_path}") + except Exception as e: + logger.warning(f"Could not test write permissions for {dir_name} directory: {e}") + + logger.debug(f"✅ {dir_name.title()} directory ready: {dir_path}") + + except Exception as e: + logger.error(f"❌ Failed to setup {dir_name} directory ({dir_path}): {e}") + raise RuntimeError(f"Directory setup failed for {dir_name}: {e}") from e + + # Note: No longer managing local temp directory - using Claude temp directly + + def ensure_output_directory(self) -> None: + """ + Runtime method to ensure output directory exists (in case it gets deleted) + """ + try: + if not self.generated_images_path.exists(): + logger.warning(f"Output directory missing, recreating: {self.generated_images_path}") + self.generated_images_path.mkdir(parents=True, exist_ok=True) + + # Verify creation was successful + if not self.generated_images_path.exists(): + raise RuntimeError(f"Failed to recreate output directory: {self.generated_images_path}") + + logger.info(f"✅ Output directory recreated: {self.generated_images_path}") + + except Exception as e: + logger.error(f"❌ Failed to ensure output directory: {e}") + raise + + def generate_base_name_simple(self) -> str: + """ + Generate simple base name in format: gptimage1_{yyyymmdd}_{hhmmss} + (without seed for cleaner naming) + + Returns: + str: Base name for files + """ + now = datetime.now() + date_str = now.strftime("%Y%m%d") + time_str = now.strftime("%H%M%S") + + return f"{self.output_filename_prefix}_{date_str}_{time_str}" + + def generate_base_name(self, seed: Optional[int] = None) -> str: + """ + Generate base name with seed for files + + Args: + seed: Optional seed value, generated if None + + Returns: + str: Base name for files + """ + if seed is None: + seed = random.randint(0, 999999) + + now = datetime.now() + date_str = now.strftime("%Y%m%d") + time_str = now.strftime("%H%M%S") + + return f"{self.output_filename_prefix}_{seed}_{date_str}_{time_str}" + + def generate_filename(self, base_name: str, file_number: int = 1, extension: str = 'png') -> str: + """ + Generate filename from base name: + - Input: gptimage1_{seed}_{yyyymmdd}_{hhmmss}_000.{ext} + - Output: gptimage1_{seed}_{yyyymmdd}_{hhmmss}_001.png + - JSON: gptimage1_{seed}_{yyyymmdd}_{hhmmss}_001.json + + Args: + base_name: Base name (e.g., gptimage1_123456_20250824_143022) + file_number: File number (0 for input, 1+ for outputs) + extension: File extension + + Returns: + str: Generated filename + """ + return f"{base_name}_{file_number:03d}.{extension}" + + def get_output_path(self, base_name: str, file_number: int = 1, extension: str = 'png') -> Path: + """ + Get full path for output file with directory verification + + Args: + base_name: Base name for the file + file_number: File number (0 for input, 1+ for outputs) + extension: File extension + + Returns: + Path: Full path to the file + """ + # Ensure output directory exists before returning path + self.ensure_output_directory() + + filename = self.generate_filename(base_name, file_number, extension) + return self.generated_images_path / filename + + def parse_base_name(self, filename: str) -> Optional[str]: + """ + Extract base name from a filename + + Args: + filename: Filename to parse + + Returns: + Base name if valid format, None otherwise + """ + path = Path(filename) + stem = path.stem + + # Check if it matches our pattern: {prefix}_{seed}_{date}_{time}_{number} + parts = stem.split('_') + if len(parts) >= 5: + # Reconstruct base name without the file number + return '_'.join(parts[:-1]) + return None + + def find_input_file(self, base_name: str) -> Optional[Path]: + """ + Find input file with given base name + + Args: + base_name: Base name to search for (e.g., gptimage1_123456_20250824_143022) + + Returns: + Path to input file if found, None otherwise + """ + # Try common image extensions + for ext in ['png', 'jpg', 'jpeg', 'webp', 'gif', 'bmp']: + input_path = self.generated_images_path / f"{base_name}_000.{ext}" + if input_path.exists(): + return input_path + return None + + def find_latest_output(self, base_name: str) -> Tuple[Optional[Path], int]: + """ + Find the latest output file for a base name and return next available number + + Args: + base_name: Base name to search for + + Returns: + Tuple of (latest output path or None, next available number) + """ + max_number = 0 + latest_path = None + + # Search for existing outputs + pattern = f"{base_name}_*.png" + for file_path in self.generated_images_path.glob(pattern): + stem = file_path.stem + try: + # Extract number from filename + number = int(stem.split('_')[-1]) + if number > max_number and number > 0: # Skip 000 (input) + max_number = number + latest_path = file_path + except (ValueError, IndexError): + continue + + return latest_path, max_number + 1 + + def validate(self) -> bool: + """ + Validate configuration + + Returns: + bool: True if configuration is valid + """ + if not self.api_key: + logger.error("OPENAI_API_KEY is not set") + return False + + if not self.api_key.startswith('sk-'): + logger.warning("API key doesn't start with 'sk-', might be invalid") + + if self.max_image_size_mb <= 0 or self.max_image_size_mb > 10: + logger.error(f"Invalid MAX_IMAGE_SIZE_MB: {self.max_image_size_mb} (must be 1-10)") + return False + + if self.default_timeout <= 0: + logger.error(f"Invalid DEFAULT_TIMEOUT: {self.default_timeout}") + return False + + logger.info("Configuration validated successfully") + return True + + def get_max_image_size_bytes(self) -> int: + """Get maximum image size in bytes""" + return self.max_image_size_mb * 1024 * 1024 + + def get_optimal_size(self, width: int, height: int) -> str: + """ + Get optimal size based on input image dimensions + + Args: + width: Input image width + height: Input image height + + Returns: + str: Optimal size string (e.g., "1024x1024") + """ + from ..utils.token_utils import determine_optimal_size_for_aspect_ratio + size, aspect_type = determine_optimal_size_for_aspect_ratio(width, height) + return size + + def get_token_limit(self, size: str = "1024x1024") -> int: + """ + Get token limit for current quality and size settings + + Args: + size: Image size + + Returns: + int: Token limit + """ + from ..utils.token_utils import get_token_limit_for_size + return get_token_limit_for_size(size, self.QUALITY) + + def __str__(self) -> str: + """String representation""" + return ( + f"GPTEdit Configuration:\n" + f" API Key: {'***' + self.api_key[-4:] if self.api_key else 'Not Set'}\n" + f" Organization: {self.organization or 'Not Set'}\n" + f" Max Image Size: {self.max_image_size_mb}MB\n" + f" Timeout: {self.default_timeout}s\n" + f" Auto Mask: {self.enable_auto_mask}\n" + f" Auto Optimize: {self.enable_auto_optimize}\n" + f" Input Directory: {self.input_path}\n" + f" Output Directory: {self.generated_images_path}\n" + f" Save Parameters: {self.save_parameters}" + ) diff --git a/src/connector/openai_client.py b/src/connector/openai_client.py new file mode 100644 index 0000000..00aee9e --- /dev/null +++ b/src/connector/openai_client.py @@ -0,0 +1,668 @@ +"""Enhanced OpenAI Client with automatic image optimization and retry logic""" + +import asyncio +import base64 +import logging +import time +import re +from typing import List, Optional, Dict, Any, Tuple +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +try: + from openai import AsyncOpenAI + import openai +except ImportError as e: + raise ImportError(f"OpenAI library not found: {e}. Install with: pip install openai") + +from .config import Config + +# Import all utilities at module level to avoid repeated imports +from ..utils.image_utils import ( + validate_image_file, + convert_to_png_with_size_limit, + get_file_size_mb, + get_image_dimensions, + convert_to_png, + get_image_dimensions_from_bytes, + ensure_transparent_background, + ensure_opaque_background +) +from ..utils.token_utils import ( + validate_prompt_length, + truncate_prompt_to_fit, + get_prompt_stats +) + +logger = logging.getLogger(__name__) + + +class EditErrorType(Enum): + """API error type classification""" + QUOTA_EXCEEDED = "quota_exceeded" + INVALID_IMAGE = "invalid_image" + AUTHENTICATION = "authentication" + TIMEOUT = "timeout" + NETWORK = "network" + SERVICE_UNAVAILABLE = "service_unavailable" + INVALID_REQUEST = "invalid_request" + TOKEN_LIMIT_EXCEEDED = "token_limit_exceeded" + UNKNOWN = "unknown" + + +def classify_api_error(error: Exception) -> Tuple[EditErrorType, str]: + """ + Classify API errors and return user-friendly messages + + Args: + error: Exception that occurred + + Returns: + tuple: (error type, user message) + """ + error_str = str(error).lower() + + # OpenAI specific errors + if isinstance(error, openai.RateLimitError): + return EditErrorType.QUOTA_EXCEEDED, "API rate limit exceeded. Please try again later." + + if isinstance(error, openai.AuthenticationError): + return EditErrorType.AUTHENTICATION, "Authentication failed. Please check your API key." + + if isinstance(error, openai.APITimeoutError): + return EditErrorType.TIMEOUT, "API request timed out. This may be due to a large image or high server load. Try with a smaller image or retry later." + + if isinstance(error, openai.APIConnectionError): + return EditErrorType.NETWORK, "Network connection error. Please check your internet connection." + + if isinstance(error, openai.BadRequestError): + # Check if it's a token limit error + if "token" in error_str: + return EditErrorType.TOKEN_LIMIT_EXCEEDED, f"Prompt exceeds token limit: {str(error)}" + return EditErrorType.INVALID_REQUEST, f"Invalid request: {str(error)}" + + if isinstance(error, openai.InternalServerError): + return EditErrorType.SERVICE_UNAVAILABLE, "OpenAI service error. Please try again later." + + # String-based classification + if any(keyword in error_str for keyword in ['quota', 'limit', 'exceeded']): + if 'token' in error_str: + return EditErrorType.TOKEN_LIMIT_EXCEEDED, "Prompt exceeds token limit. Please shorten your prompt." + return EditErrorType.QUOTA_EXCEEDED, "API usage limit reached. Please try again later." + + if any(keyword in error_str for keyword in ['invalid', 'image', 'format']): + return EditErrorType.INVALID_IMAGE, "Invalid image format or size. Please check your input." + + if any(keyword in error_str for keyword in ['timeout', 'timed out']): + return EditErrorType.TIMEOUT, "Request timed out. Please try again." + + return EditErrorType.UNKNOWN, f"Unexpected error: {str(error)}" + + +def parse_rate_limit_reset_time(error_message: str) -> Optional[int]: + """ + Parse rate limit reset time from error message + + Args: + error_message: Error message from API + + Returns: + Optional[int]: Reset time in seconds, None if not found + """ + # Look for patterns like "Try again in 20s" or "reset in 1m 30s" + patterns = [ + r'try again in (\d+)s', + r'reset in (\d+)s', + r'reset in (\d+)m', + r'reset in (\d+)m\s+(\d+)s', + r'retry after (\d+) seconds?' + ] + + error_lower = error_message.lower() + + for pattern in patterns: + match = re.search(pattern, error_lower) + if match: + if 'retry after' in pattern: + return int(match.group(1)) + elif 'm' in pattern: + if len(match.groups()) > 1: # minutes and seconds + return int(match.group(1)) * 60 + int(match.group(2)) + else: # just minutes + return int(match.group(1)) * 60 + else: # seconds only + return int(match.group(1)) + + return None + + +@dataclass +class ImageEditRequest: + """Image edit request data class""" + image_path: str # Path to the image to edit + prompt: str # Edit instructions + mask_path: Optional[str] = None # Optional mask image path + background: str = "transparent" # "transparent" or "opaque" + size: Optional[str] = None # Auto-determined if not specified + auto_truncate: bool = True # Auto-truncate prompt if too long + auto_optimize: bool = True # Auto-optimize image size if too large + + def validate(self) -> None: + """Validate request data""" + if not self.image_path or not Path(self.image_path).exists(): + raise ValueError(f"Image file not found: {self.image_path}") + + if self.mask_path and not Path(self.mask_path).exists(): + raise ValueError(f"Mask file not found: {self.mask_path}") + + if not self.prompt: + raise ValueError("Prompt is required") + + if self.background not in ["transparent", "opaque"]: + raise ValueError("Background must be 'transparent' or 'opaque'") + + if self.size and self.size not in Config.SUPPORTED_SIZES: + raise ValueError(f"Size must be one of: {Config.SUPPORTED_SIZES}") + + # Validate token count if size is specified + if self.size: + is_valid, token_count, error_msg = validate_prompt_length(self.prompt, self.size) + + if not is_valid: + if self.auto_truncate: + # Auto-truncate the prompt + original_length = len(self.prompt) + self.prompt = truncate_prompt_to_fit(self.prompt, self.size) + logger.warning(f"Prompt auto-truncated from {original_length} to {len(self.prompt)} chars") + else: + raise ValueError(f"Prompt validation failed: {error_msg}") + + +@dataclass +class RetryConfig: + """Configuration for retry behavior""" + max_retries: int = 2 # Reduced retries for faster failure + base_delay: float = 5.0 # Increased base delay + max_delay: float = 60.0 # Reduced max delay (1 minute) + exponential_backoff: bool = True + retry_on_quota: bool = True # Whether to retry on quota exceeded + quota_wait_threshold: int = 60 # Reduced quota wait threshold + + +@dataclass +class ImageEditResponse: + """Image edit response data class""" + edited_image_data: bytes + request: ImageEditRequest + success: bool = True + error_message: Optional[str] = None + error_type: Optional[EditErrorType] = None + execution_time: Optional[float] = None + image_size: Optional[Tuple[int, int]] = None # (width, height) + token_stats: Optional[Dict[str, Any]] = None # Token usage statistics + optimization_info: Optional[Dict[str, Any]] = None # Image optimization details + + +class OpenAIEditClient: + """OpenAI client for image editing with automatic optimization and retry logic""" + + def __init__(self, config: Config, retry_config: Optional[RetryConfig] = None): + """ + Initialize client + + Args: + config: GPTEdit configuration object + retry_config: Retry configuration (optional) + """ + if not config.validate(): + raise ValueError("Invalid configuration") + + self.config = config + self.retry_config = retry_config or RetryConfig() + self.client = AsyncOpenAI( + api_key=config.api_key, + organization=config.organization if config.organization else None, + timeout=config.default_timeout, + max_retries=0 # Disable OpenAI's internal retries to use our custom retry logic + ) + + logger.info(f"OpenAI client initialized with timeout: {config.default_timeout}s") + logger.info(f"Retry config: max_retries={self.retry_config.max_retries}, base_delay={self.retry_config.base_delay}s") + + async def _prepare_image(self, image_path: str, auto_optimize: bool = True) -> Tuple[bytes, Dict[str, Any]]: + """ + Prepare image for API request with automatic optimization + + Args: + image_path: Path to image file + auto_optimize: Whether to automatically optimize large images + + Returns: + tuple: (image_data, optimization_info) + """ + optimization_info = { + "optimized": False, + "original_size_mb": 0, + "final_size_mb": 0, + "format_used": "PNG", + "method": None + } + + # Validate image + is_valid, size_mb, error_msg = validate_image_file( + image_path, + self.config.max_image_size_mb + ) + + optimization_info["original_size_mb"] = size_mb + + if not is_valid and "exceeds" not in str(error_msg): + # Real validation error (not just size) + raise ValueError(f"Image validation failed: {error_msg}") + + # Get original dimensions for logging + original_dims = get_image_dimensions(image_path) + + # Check if optimization is needed + if size_mb > self.config.max_image_size_mb: + if not auto_optimize: + raise ValueError(f"Image too large ({size_mb:.2f}MB) and auto-optimization is disabled") + + logger.info(f"🔄 Image size {size_mb:.2f}MB exceeds limit, optimizing...") + + # Use WebP for better compression + png_data, format_used = convert_to_png_with_size_limit( + image_path, + max_size_mb=self.config.max_image_size_mb, + prefer_webp=True # Prefer WebP for better compression + ) + + final_size_mb = len(png_data) / (1024 * 1024) + + optimization_info["optimized"] = True + optimization_info["final_size_mb"] = final_size_mb + optimization_info["format_used"] = format_used + optimization_info["method"] = "WebP compression" if format_used == "WEBP" else "PNG optimization" + optimization_info["original_dimensions"] = original_dims + + # Log optimization results + reduction_pct = ((size_mb - final_size_mb) / size_mb) * 100 + logger.info(f"✅ Image optimized: {size_mb:.2f}MB → {final_size_mb:.2f}MB ({reduction_pct:.1f}% reduction)") + logger.info(f" Format: {format_used}, Dimensions: {original_dims[0]}x{original_dims[1]}") + + else: + # No optimization needed, just convert to PNG + png_data = convert_to_png(image_path) + optimization_info["final_size_mb"] = len(png_data) / (1024 * 1024) + logger.info(f"Image prepared: {Path(image_path).name} ({size_mb:.2f}MB)") + + return png_data, optimization_info + + def _should_retry(self, error_type: EditErrorType, attempt: int) -> bool: + """ + Determine if an error should be retried + + Args: + error_type: Type of error that occurred + attempt: Current attempt number (0-based) + + Returns: + bool: Whether to retry + """ + if attempt >= self.retry_config.max_retries: + return False + + # Define retryable error types + retryable_errors = { + EditErrorType.TIMEOUT, + EditErrorType.NETWORK, + EditErrorType.SERVICE_UNAVAILABLE + } + + if self.retry_config.retry_on_quota: + retryable_errors.add(EditErrorType.QUOTA_EXCEEDED) + + return error_type in retryable_errors + + async def _calculate_retry_delay(self, attempt: int, error_type: EditErrorType, error_message: str) -> float: + """ + Calculate delay before retry + + Args: + attempt: Current attempt number (0-based) + error_type: Type of error + error_message: Error message + + Returns: + float: Delay in seconds + """ + if error_type == EditErrorType.QUOTA_EXCEEDED: + # Try to parse reset time from error message + reset_time = parse_rate_limit_reset_time(error_message) + if reset_time and reset_time <= self.retry_config.quota_wait_threshold: + logger.info(f"⏳ API quota exceeded, waiting {reset_time}s for reset...") + return reset_time + 5 # Add 5 seconds buffer + elif reset_time: + logger.warning(f"⚠️ Quota reset time too long ({reset_time}s), using exponential backoff instead") + + # Use exponential backoff + if self.retry_config.exponential_backoff: + delay = self.retry_config.base_delay * (2 ** attempt) + else: + delay = self.retry_config.base_delay + + # Cap at max_delay + return min(delay, self.retry_config.max_delay) + + async def _edit_image_with_retry(self, request: ImageEditRequest) -> ImageEditResponse: + """ + Edit image with retry logic + + Args: + request: Image edit request + + Returns: + ImageEditResponse: Edit result + """ + last_response = None + + for attempt in range(self.retry_config.max_retries + 1): + try: + logger.debug(f"🔄 Attempt {attempt + 1}/{self.retry_config.max_retries + 1}") + + response = await self._edit_image_single_attempt(request) + + if response.success: + if attempt > 0: + logger.info(f"✅ Success after {attempt + 1} attempts") + return response + + # Check if we should retry this error + if not self._should_retry(response.error_type, attempt): + logger.info(f"❌ Error type {response.error_type.value} not retryable, giving up") + return response + + # Calculate delay before retry + delay = await self._calculate_retry_delay(attempt, response.error_type, response.error_message or "") + + logger.warning(f"⚠️ Attempt {attempt + 1} failed ({response.error_type.value}), retrying in {delay:.1f}s...") + await asyncio.sleep(delay) + + last_response = response + + except Exception as e: + # Unexpected error + error_type, user_message = classify_api_error(e) + + if not self._should_retry(error_type, attempt): + logger.error(f"❌ Unexpected error not retryable: {str(e)}") + raise + + delay = await self._calculate_retry_delay(attempt, error_type, str(e)) + logger.warning(f"⚠️ Unexpected error on attempt {attempt + 1}, retrying in {delay:.1f}s: {str(e)}") + await asyncio.sleep(delay) + + # All retries exhausted + logger.error(f"❌ All {self.retry_config.max_retries + 1} attempts failed") + return last_response or ImageEditResponse( + edited_image_data=b'', + request=request, + success=False, + error_message="All retry attempts failed", + error_type=EditErrorType.UNKNOWN + ) + + async def _edit_image_single_attempt(self, request: ImageEditRequest) -> ImageEditResponse: + """ + Single attempt to edit image (without retry logic) + + Args: + request: Image edit request object + + Returns: + ImageEditResponse: Edit result + """ + start_time = time.time() + token_stats = None + optimization_info = None + + try: + # Prepare image with auto-optimization + logger.debug(f"🎨 Starting image edit (single attempt)") + image_data, optimization_info = await self._prepare_image( + request.image_path, + auto_optimize=request.auto_optimize + ) + + # Prepare mask if provided + mask_data = None + mask_optimization_info = None + if request.mask_path: + mask_data, mask_optimization_info = await self._prepare_image( + request.mask_path, + auto_optimize=request.auto_optimize + ) + if mask_optimization_info["optimized"]: + logger.info(f"📋 Mask also optimized: {mask_optimization_info['original_size_mb']:.2f}MB → {mask_optimization_info['final_size_mb']:.2f}MB") + else: + logger.info("📋 Mask image prepared") + + # Determine optimal size if not specified + if not request.size: + width, height = get_image_dimensions(request.image_path) + request.size = self.config.get_optimal_size(width, height) + logger.info(f"📐 Auto-selected size: {request.size} based on {width}x{height}") + + # Now validate the request (including token count) + request.validate() + + # Get token statistics + token_stats = get_prompt_stats(request.prompt, request.size) + + # Log token usage + logger.debug(f"📊 Token usage: {token_stats['estimated_tokens']}/{token_stats['token_limit']} ({token_stats['usage_percentage']}%)") + logger.debug(f"💭 Prompt: '{request.prompt[:100]}{'...' if len(request.prompt) > 100 else ''}'") + + # Warn if close to limit + if token_stats['usage_percentage'] > 90: + logger.warning(f"⚠️ Token usage is at {token_stats['usage_percentage']}% of limit") + + # Prepare API parameters + api_params = { + "model": Config.MODEL, + "image": image_data, + "prompt": request.prompt, + "n": Config.NUMBER_OF_IMAGES, + "size": request.size, + # Note: response_format is not supported for image edit API + "user": self.config.user if self.config.user else None + } + + # Add mask if provided + if mask_data: + api_params["mask"] = mask_data + + # Log request details + logger.debug(f"📤 Sending edit request to OpenAI API") + logger.debug(f" Model: {Config.MODEL}") + logger.debug(f" Quality: {Config.QUALITY}") + logger.debug(f" Size: {request.size}") + logger.debug(f" Background: {request.background}") + if optimization_info["optimized"]: + logger.debug(f" Image optimized: {optimization_info['method']}") + + # Make API call with enhanced logging + logger.info(f"📤 Sending edit request to OpenAI API (timeout: {self.config.default_timeout}s)") + logger.info(f" Image size: {len(image_data) / 1024:.1f}KB") + logger.info(f" Prompt length: {len(request.prompt)} chars") + + response = await self.client.images.edit(**api_params) + + # Extract edited image data + if not response.data: + raise ValueError("No image data in response") + + # Get base64 data + image_b64 = response.data[0].b64_json + if not image_b64: + raise ValueError("No base64 data in response") + + # Decode base64 + edited_image_data = base64.b64decode(image_b64) + + # Apply background setting if needed + if request.background == "transparent": + edited_image_data = ensure_transparent_background(edited_image_data) + elif request.background == "opaque": + edited_image_data = ensure_opaque_background(edited_image_data) + + execution_time = time.time() - start_time + + # Get final image dimensions + final_size = get_image_dimensions_from_bytes(edited_image_data) + + logger.debug(f"✅ Single attempt successful ({execution_time:.1f}s)") + logger.debug(f" Output size: {final_size[0]}x{final_size[1]}") + logger.debug(f" Data size: {len(edited_image_data):,} bytes") + + return ImageEditResponse( + edited_image_data=edited_image_data, + request=request, + success=True, + execution_time=execution_time, + image_size=final_size, + token_stats=token_stats, + optimization_info=optimization_info + ) + + except Exception as e: + execution_time = time.time() - start_time + error_type, user_message = classify_api_error(e) + + logger.debug(f"❌ Single attempt failed ({execution_time:.1f}s): {error_type.value}") + + return ImageEditResponse( + edited_image_data=b'', + request=request, + success=False, + error_message=user_message, + error_type=error_type, + execution_time=execution_time, + token_stats=token_stats, + optimization_info=optimization_info + ) + + async def edit_image(self, request: ImageEditRequest) -> ImageEditResponse: + """ + Edit an image using OpenAI API with automatic optimization and retry logic + + Args: + request: Image edit request object + + Returns: + ImageEditResponse: Edit result + """ + logger.info(f"🎨 Starting image edit with retry support") + + if self.retry_config.max_retries > 0: + return await self._edit_image_with_retry(request) + else: + return await self._edit_image_single_attempt(request) + + async def batch_edit(self, requests: List[ImageEditRequest]) -> List[ImageEditResponse]: + """ + Process multiple edit requests with improved error handling + + Args: + requests: List of edit requests + + Returns: + List of edit responses + """ + if len(requests) > 16: + raise ValueError("Maximum 16 images can be edited in a batch") + + logger.info(f"📦 Starting batch edit for {len(requests)} images") + + # Separate small and large images for optimized processing + small_requests = [] + large_requests = [] + + for req in requests: + try: + size_mb = get_file_size_mb(req.image_path) + if size_mb > 2.0: # 2MB threshold + large_requests.append(req) + else: + small_requests.append(req) + except Exception as e: + logger.warning(f"Could not check size of {req.image_path}: {e}") + small_requests.append(req) # Default to small group + + # Process small images concurrently + small_tasks = [self.edit_image(req) for req in small_requests] + small_results = await asyncio.gather(*small_tasks, return_exceptions=True) + + # Process large images sequentially to avoid memory issues + large_results = [] + for req in large_requests: + try: + result = await self.edit_image(req) + large_results.append(result) + # Brief pause for memory cleanup + await asyncio.sleep(0.1) + except Exception as e: + logger.error(f"Error processing large image {req.image_path}: {e}") + # Create error response + error_type, user_message = classify_api_error(e) + large_results.append(ImageEditResponse( + edited_image_data=b'', + request=req, + success=False, + error_message=user_message, + error_type=error_type + )) + + # Combine results maintaining original order + all_results = [] + small_idx = 0 + large_idx = 0 + + for req in requests: + if req in small_requests: + result = small_results[small_idx] + # Handle exceptions from gather + if isinstance(result, Exception): + error_type, user_message = classify_api_error(result) + result = ImageEditResponse( + edited_image_data=b'', + request=req, + success=False, + error_message=user_message, + error_type=error_type + ) + all_results.append(result) + small_idx += 1 + else: + all_results.append(large_results[large_idx]) + large_idx += 1 + + # Log summary + successful = sum(1 for r in all_results if r.success) + optimized = sum(1 for r in all_results if r.optimization_info and r.optimization_info.get("optimized", False)) + total_tokens = sum(r.token_stats.get('estimated_tokens', 0) for r in all_results if r.token_stats) + + logger.info(f"✅ Batch edit complete: {successful}/{len(requests)} successful") + if optimized > 0: + logger.info(f"🔄 {optimized} images were auto-optimized") + logger.info(f"📊 Total tokens used: ~{total_tokens}") + + return all_results + + def health_check(self) -> bool: + """Check client status""" + try: + return self.client is not None and self.config.validate() + except Exception: + return False diff --git a/src/connector/openai_client_backup.py.disabled b/src/connector/openai_client_backup.py.disabled new file mode 100644 index 0000000..d2aec7c --- /dev/null +++ b/src/connector/openai_client_backup.py.disabled @@ -0,0 +1,3 @@ +# REMOVED: This backup file contained response_format parameter that caused API errors +# The issue has been fixed in the main openai_client.py file +# This file was causing conflicts and has been removed during cleanup diff --git a/src/server/__init__.py b/src/server/__init__.py new file mode 100644 index 0000000..6b71e89 --- /dev/null +++ b/src/server/__init__.py @@ -0,0 +1,11 @@ +"""Server module""" + +from .mcp_server import GPTEditMCPServer +from .models import MCPToolDefinitions +from .handlers import ToolHandlers + +__all__ = [ + 'GPTEditMCPServer', + 'MCPToolDefinitions', + 'ToolHandlers' +] diff --git a/src/server/handlers.py b/src/server/handlers.py new file mode 100644 index 0000000..19ebd83 --- /dev/null +++ b/src/server/handlers.py @@ -0,0 +1,1381 @@ +"""MCP Tool Handlers for GPTEdit - Full naming convention""" + +import json +import logging +import asyncio +import shutil +import random +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, List, Optional + +from mcp.types import TextContent, ImageContent + +from ..connector import Config, OpenAIEditClient, ImageEditRequest +from ..utils import ( + validate_image_file, + save_image, + create_mask_from_alpha, + encode_image_base64, + decode_image_base64, + validate_edit_parameters, + validate_batch_parameters, + sanitize_prompt, + get_image_dimensions, + get_image_dimensions_from_bytes +) +from ..utils.image_utils import get_file_size_mb, optimize_image_to_size_limit + +logger = logging.getLogger(__name__) + + +def sanitize_args_for_logging(args: Dict[str, Any]) -> Dict[str, Any]: + """ + Remove sensitive data from arguments for logging + + Args: + args: Original arguments + + Returns: + dict: Sanitized arguments + """ + safe_args = args.copy() + + # Don't log full image data + if 'image_data' in safe_args: + safe_args['image_data'] = f"<{len(safe_args['image_data'])} bytes>" + + # Truncate long prompts + if 'prompt' in safe_args and len(safe_args['prompt']) > 100: + safe_args['prompt'] = safe_args['prompt'][:100] + '...' + + return safe_args + + +def get_file_extension(file_path: str) -> str: + """Get file extension from path""" + return Path(file_path).suffix[1:].lower() if Path(file_path).suffix else 'png' + + +class ToolHandlers: + """Handler class for MCP tools""" + + def __init__(self, config: Config): + """Initialize handlers with configuration""" + self.config = config + self.client = OpenAIEditClient(config) + self.current_seed = None # Track current seed for session + + def _get_or_create_seed(self) -> int: + """Get current seed or create new one""" + if self.current_seed is None: + self.current_seed = random.randint(0, 999999) + return self.current_seed + + def _reset_seed(self): + """Reset seed for new session""" + self.current_seed = None + + def _save_b64_to_temp_file(self, b64_data: str, filename: str) -> str: + """Save base64 data to a temporary file with specified filename + + Args: + b64_data: Base64 encoded image data (can be PNG, JPEG, WebP, etc.) + filename: Desired filename for the file + + Returns: + str: Path to saved file + """ + try: + # Decode base64 data + image_data = decode_image_base64(b64_data) + + # Save to local temp directory for processing + temp_dir = self.config.base_path / 'temp' + temp_dir.mkdir(exist_ok=True) + file_path = temp_dir / filename + + if not save_image(image_data, str(file_path)): + raise RuntimeError(f"Failed to save image to temp file: {filename}") + + logger.info(f"Saved temp file: {filename} ({len(image_data) / 1024:.1f} KB)") + + return str(file_path) + except Exception as e: + logger.error(f"Error saving b64 to temp file: {e}") + raise + + def _move_temp_to_generated(self, temp_file_path: str, base_name: str, index: int, extension: str = None) -> str: + """ + Move file from temp directory to generated_images directory + + Args: + temp_file_path: Path to temporary file + base_name: Base name for the destination file + index: Index for the file (0 for input, 1+ for output) + extension: File extension (will detect from temp file if not provided) + + Returns: + str: Path to moved file in generated_images directory + """ + try: + # Ensure output directory exists + self.config.ensure_output_directory() + + temp_path = Path(temp_file_path) + + # Verify source file exists + if not temp_path.exists(): + raise FileNotFoundError(f"Temp file not found: {temp_file_path}") + + # Detect extension from temp file if not provided + if extension is None: + extension = temp_path.suffix[1:] if temp_path.suffix else 'png' + + # Generate destination filename + dest_filename = self.config.generate_filename(base_name, index, extension) + dest_path = self.config.generated_images_path / dest_filename + + # Copy file (preserve original in temp for potential reuse) + import shutil + try: + shutil.copy2(temp_file_path, dest_path) + + # Verify copy was successful + if not dest_path.exists(): + raise RuntimeError(f"File copy verification failed: {dest_path}") + + # Check file sizes match + if temp_path.stat().st_size != dest_path.stat().st_size: + raise RuntimeError(f"File copy size mismatch: {temp_path.stat().st_size} != {dest_path.stat().st_size}") + + except PermissionError as e: + raise RuntimeError(f"Permission denied copying file to {dest_path}: {e}") + except shutil.Error as e: + raise RuntimeError(f"Copy operation failed: {e}") + + logger.info(f"Moved temp file to generated_images: {temp_path.name} → {dest_filename}") + + return str(dest_path) + + except Exception as e: + logger.error(f"Error moving temp file to generated_images: {e}") + raise + + def _copy_temp_to_generated(self, temp_file_path: str, base_name: str, index: int, extension: str = None) -> str: + """ + Copy file from temp directory to generated_images directory (alias for _move_temp_to_generated) + """ + return self._move_temp_to_generated(temp_file_path, base_name, index, extension) + + async def _read_claude_temp_file(self, image_name: str) -> bytes: + """Read image file from Claude's temp directory using window.fs.readFile + + Args: + image_name: Name of the image file in Claude temp + + Returns: + bytes: Image file data + + Raises: + FileNotFoundError: If file doesn't exist in Claude temp + RuntimeError: If reading fails + """ + try: + # Use Claude's window.fs.readFile API to read from Claude temp + # This is a mock implementation - in real MCP this would be handled differently + # For now, we'll raise an immediate error as requested + raise RuntimeError(f"❌ IMMEDIATE ERROR: Cannot read from Claude temp directory in MCP server. File: {image_name}") + + except Exception as e: + logger.error(f"Failed to read {image_name} from Claude temp: {e}") + raise RuntimeError(f"❌ Cannot access Claude temp file: {image_name}") from e + + + + async def handle_edit_image(self, arguments: Dict[str, Any]) -> List[TextContent | ImageContent]: + """ + Handle edit_image tool call + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + # Check for input_image_b64 parameter + if 'input_image_b64' not in arguments: + return [TextContent( + type="text", + text="❌ input_image_b64 is required" + )] + + # Get or generate image name + image_name = arguments.get('input_image_name', f'temp_image_{random.randint(1000, 9999)}.png') + + # Save base64 image to temp directory with specified name + image_path = self._save_b64_to_temp_file( + arguments['input_image_b64'], + image_name + ) + logger.info(f"Input image saved to temp: {image_name}") + + # Validate the saved image + is_valid, size_mb, error_msg = validate_image_file(image_path, self.config.max_image_size_mb) + if not is_valid: + return [TextContent( + type="text", + text=f"❌ Image validation failed: {error_msg}" + )] + + # Generate base name and copy temp file to generated_images + base_name = self.config.generate_base_name_simple() + + # Copy temp file to generated_images as {base_name}_000.ext + input_generated_path = self._copy_temp_to_generated(image_path, base_name, 0) + logger.info(f"Input file copied to generated_images: {Path(input_generated_path).name}") + + # Sanitize prompt + prompt = sanitize_prompt(arguments['prompt']) + + # Create edit request (use generated path for processing) + request = ImageEditRequest( + image_path=input_generated_path, # Use generated path instead of temp + prompt=prompt, + background=arguments.get('background', 'transparent') + ) + + # Process edit + response = await self.client.edit_image(request) + + if not response.success: + return [TextContent( + type="text", + text=f"❌ Edit failed: {response.error_message}" + )] + + # Save output image as {base_name}_001.png + saved_path = None + json_path = None + if arguments.get('save_to_file', True): + output_path = self.config.get_output_path(base_name, 1, 'png') + + if save_image(response.edited_image_data, str(output_path)): + saved_path = str(output_path) + + # Save parameters as {base_name}_001.json + if self.config.save_parameters: + params_dict = { + "base_name": base_name, + "timestamp": datetime.now().isoformat(), + "prompt": request.prompt, + "background": request.background, + "input_image_name": image_name, + "input_temp_path": image_path, + "input_generated_path": input_generated_path, + "input_size": get_image_dimensions(input_generated_path), + "output_size": response.image_size, + "execution_time": response.execution_time, + "optimization": response.optimization_info if response.optimization_info else None, + "token_stats": response.token_stats if response.token_stats else None, + "config": { + "model": Config.MODEL, + "quality": Config.QUALITY, + "api_version": "gpt-image-1" + } + } + + json_path = self.config.get_output_path(base_name, 1, 'json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(params_dict, f, indent=2, ensure_ascii=False) + logger.info(f"Parameters saved to: {json_path}") + + # Prepare response + contents = [] + + # Add text description + text = f"✅ Image edited successfully!\n" + text += f"📝 Input: {image_name}\n" + text += f"📁 Base name: {base_name}\n" + text += f"📐 Size: {response.image_size[0]}x{response.image_size[1]}\n" + text += f"⏱️ Processing time: {response.execution_time:.1f}s\n" + + # Add optimization info if image was optimized + if response.optimization_info and response.optimization_info["optimized"]: + opt_info = response.optimization_info + text += f"\n🔄 Image was auto-optimized:\n" + text += f" Original: {opt_info['original_size_mb']:.2f}MB\n" + text += f" Optimized: {opt_info['final_size_mb']:.2f}MB\n" + text += f" Format: {opt_info['format_used']}\n" + + if saved_path: + text += f"\n💾 Output: {Path(saved_path).name}" + text += f"\n📝 Input: {Path(input_generated_path).name}" # Show generated file + if json_path: + text += f"\n📋 Parameters: {Path(json_path).name}" + + contents.append(TextContent(type="text", text=text)) + + # Add image preview + image_b64 = encode_image_base64(response.edited_image_data) + contents.append(ImageContent( + type="image", + data=image_b64, + mimeType="image/png" + )) + + # Reset seed for next session + self._reset_seed() + + return contents + + except Exception as e: + logger.error(f"Error in handle_edit_image: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ Unexpected error: {str(e)}" + )] + + async def handle_edit_with_mask(self, arguments: Dict[str, Any]) -> List[TextContent | ImageContent]: + """ + Handle edit_with_mask tool call + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + # Validate required parameters + if 'input_image_b64' not in arguments: + return [TextContent( + type="text", + text="❌ input_image_b64 is required" + )] + if 'input_mask_b64' not in arguments: + return [TextContent( + type="text", + text="❌ input_mask_b64 is required for masked editing" + )] + + # Get or generate image names + image_name = arguments.get('input_image_name', f'temp_image_{random.randint(1000, 9999)}.png') + mask_name = arguments.get('mask_image_name', f'temp_mask_{random.randint(1000, 9999)}.png') + + # Save base64 images to temp directory with specified names + image_path = self._save_b64_to_temp_file( + arguments['input_image_b64'], + image_name + ) + logger.info(f"Input image saved to temp: {image_name}") + + # Save base64 mask to temp directory + mask_path = self._save_b64_to_temp_file( + arguments['input_mask_b64'], + mask_name + ) + logger.info(f"Mask image saved to temp: {mask_name}") + + # Validate the saved images + is_valid, size_mb, error_msg = validate_image_file(image_path, self.config.max_image_size_mb) + if not is_valid: + return [TextContent( + type="text", + text=f"❌ Image validation failed: {error_msg}" + )] + + # Generate base name and move temp files to generated_images + base_name = self.config.generate_base_name_simple() + + # Copy temp files to generated_images + input_generated_path = self._copy_temp_to_generated(image_path, base_name, 0) + mask_generated_path = self._copy_temp_to_generated(mask_path, f"{base_name}_mask", 0, 'png') # Force PNG for mask + + logger.info(f"Input file copied to generated_images: {Path(input_generated_path).name}") + logger.info(f"Mask file copied to generated_images: {Path(mask_generated_path).name}") + + # Sanitize prompt + prompt = sanitize_prompt(arguments['prompt']) + + # Create edit request with mask (use generated paths) + request = ImageEditRequest( + image_path=input_generated_path, + prompt=prompt, + mask_path=mask_generated_path, + background=arguments.get('background', 'transparent') + ) + + # Process edit + response = await self.client.edit_image(request) + + if not response.success: + return [TextContent( + type="text", + text=f"❌ Masked edit failed: {response.error_message}" + )] + + # Save output image + saved_path = None + json_path = None + if arguments.get('save_to_file', True): + output_path = self.config.get_output_path(base_name, 1, 'png') + + if save_image(response.edited_image_data, str(output_path)): + saved_path = str(output_path) + + # Save parameters + if self.config.save_parameters: + params_dict = { + "base_name": base_name, + "timestamp": datetime.now().isoformat(), + "prompt": request.prompt, + "background": request.background, + "input_image_name": image_name, + "mask_image_name": mask_name, + "input_temp_path": image_path, + "mask_temp_path": mask_path, + "input_generated_path": input_generated_path, + "mask_generated_path": mask_generated_path, + "input_size": get_image_dimensions(input_generated_path), + "output_size": response.image_size, + "execution_time": response.execution_time + } + + json_path = self.config.get_output_path(base_name, 1, 'json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(params_dict, f, indent=2, ensure_ascii=False) + + # Prepare response + contents = [] + + text = f"✅ Image edited with mask successfully!\n" + text += f"📝 Input: {image_name}\n" + text += f"🎭 Mask: {mask_name}\n" + text += f"📁 Base name: {base_name}\n" + text += f"📐 Size: {response.image_size[0]}x{response.image_size[1]}\n" + text += f"⏱️ Processing time: {response.execution_time:.1f}s\n" + + if saved_path: + text += f"\n💾 Output: {Path(saved_path).name}" + if json_path: + text += f"\n📋 Parameters: {Path(json_path).name}" + + contents.append(TextContent(type="text", text=text)) + + # Add image preview + image_b64 = encode_image_base64(response.edited_image_data) + contents.append(ImageContent( + type="image", + data=image_b64, + mimeType="image/png" + )) + + # Reset seed for next session + self._reset_seed() + + return contents + + except Exception as e: + logger.error(f"Error in handle_edit_with_mask: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ Unexpected error: {str(e)}" + )] + + async def handle_batch_edit(self, arguments: Dict[str, Any]) -> List[TextContent]: + """ + Handle batch_edit tool call + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + # Validate batch parameters + edits = arguments.get('edits', []) + is_valid, error_msg = validate_batch_parameters(edits) + if not is_valid: + return [TextContent( + type="text", + text=f"❌ Batch validation failed: {error_msg}" + )] + + # Get seed and generate base name for this batch + seed = self._get_or_create_seed() + base_name = self.config.generate_base_name(seed) + + # Create edit requests + requests = [] + temp_files_data = {} # Store Claude temp file data + + # First, read all files from Claude temp + for edit_params in edits: + image_name = edit_params['input_image_name'] + try: + image_data = await self._read_claude_temp_file(image_name) + temp_files_data[image_name] = image_data + except Exception as e: + return [TextContent( + type="text", + text=f"❌ Failed to read {image_name} from Claude temp: {str(e)}" + )] + + # Check if mask is specified + if 'mask_image_name' in edit_params: + mask_name = edit_params['mask_image_name'] + try: + mask_data = await self._read_claude_temp_file(mask_name) + temp_files_data[mask_name] = mask_data + except Exception as e: + return [TextContent( + type="text", + text=f"❌ Failed to read mask {mask_name} from Claude temp: {str(e)}" + )] + + # Now create requests using the read data + for edit_params in edits: + image_name = edit_params['input_image_name'] + + # Save temp data to local temp file for processing + local_temp_path = self._save_b64_to_temp_file( + encode_image_base64(temp_files_data[image_name]), + image_name + ) + + # Check if mask is specified + mask_path = None + if 'mask_image_name' in edit_params: + mask_name = edit_params['mask_image_name'] + mask_path = self._save_b64_to_temp_file( + encode_image_base64(temp_files_data[mask_name]), + mask_name + ) + + request = ImageEditRequest( + image_path=local_temp_path, + prompt=sanitize_prompt(edit_params['prompt']), + mask_path=mask_path, + background=edit_params.get('background', 'transparent') + ) + requests.append(request) + + # Process batch + logger.info(f"Processing batch of {len(requests)} images with base name {base_name}") + responses = await self.client.batch_edit(requests) + + # Save images and prepare results + results = [] + save_to_file = arguments.get('save_to_file', True) + + for i, response in enumerate(responses): + result = { + "index": i + 1, + "success": response.success, + "input_image_name": edits[i]['input_image_name'] + } + + if response.success: + result["size"] = f"{response.image_size[0]}x{response.image_size[1]}" + result["execution_time"] = response.execution_time + + if save_to_file: + # Save with incrementing numbers + output_path = self.config.get_output_path(base_name, i + 1, 'png') + + if save_image(response.edited_image_data, str(output_path)): + result["saved_path"] = str(output_path) + + # Save parameters for this image + if self.config.save_parameters: + params_dict = { + "base_name": base_name, + "seed": seed, + "timestamp": datetime.now().isoformat(), + "batch_index": i + 1, + "prompt": edits[i]['prompt'], + "background": edits[i].get('background', 'transparent'), + "input_image_name": edits[i]['input_image_name'], + "mask_image_name": edits[i].get('mask_image_name'), + "output_size": response.image_size, + "execution_time": response.execution_time + } + + json_path = self.config.get_output_path(base_name, i + 1, 'json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(params_dict, f, indent=2, ensure_ascii=False) + else: + result["error"] = response.error_message + + results.append(result) + + # Prepare summary + successful = sum(1 for r in results if r.get("success")) + failed = len(results) - successful + + text = f"📦 Batch Edit Complete\n" + text += f"🎲 Seed: {seed}\n" + text += f"📁 Base name: {base_name}\n" + text += f"✅ Successful: {successful}/{len(results)}\n" + if failed > 0: + text += f"❌ Failed: {failed}\n" + text += "\n📋 Results:\n" + text += "```json\n" + text += json.dumps(results, indent=2) + text += "\n```" + + # Reset seed for next session + self._reset_seed() + + return [TextContent(type="text", text=text)] + + except Exception as e: + logger.error(f"Error in handle_batch_edit: {e}", exc_info=True) + self._reset_seed() + return [TextContent( + type="text", + text=f"❌ Batch processing error: {str(e)}" + )] + + async def handle_validate_image(self, arguments: Dict[str, Any]) -> List[TextContent]: + """ + Handle validate_image tool call + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + image_path = arguments.get('image_path') + if not image_path: + return [TextContent( + type="text", + text="❌ image_path is required" + )] + + # Validate image + is_valid, size_mb, error_msg = validate_image_file( + image_path, + self.config.max_image_size_mb + ) + + # Get additional info if valid + if is_valid: + from ..utils import get_image_dimensions + width, height = get_image_dimensions(image_path) + + text = f"✅ Image validation passed!\n" + text += f"📁 File: {Path(image_path).name}\n" + text += f"📐 Dimensions: {width}x{height}\n" + text += f"💾 Size: {size_mb:.2f}MB\n" + text += f"🎯 Optimal edit size: {self.config.get_optimal_size(width, height)}" + else: + text = f"❌ Image validation failed!\n" + text += f"📁 File: {Path(image_path).name}\n" + text += f"⚠️ Issue: {error_msg}" + + return [TextContent(type="text", text=text)] + + except Exception as e: + logger.error(f"Error in handle_validate_image: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ Validation error: {str(e)}" + )] + + async def handle_create_mask_from_alpha(self, arguments: Dict[str, Any]) -> List[TextContent]: + """ + Handle create_mask_from_alpha tool call + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + image_path = arguments.get('image_path') + output_path = arguments.get('output_path') + + if not image_path: + return [TextContent( + type="text", + text="❌ image_path is required" + )] + + # Generate output path if not provided + if not output_path: + seed = random.randint(0, 999999) + base_name = self.config.generate_base_name(seed) + output_path = str(self.config.get_output_path(f"{base_name}_mask", 0, 'png')) + + # Create mask from alpha channel + mask_data = create_mask_from_alpha(image_path) + + if not mask_data: + return [TextContent( + type="text", + text="❌ Failed to create mask. Image may not have an alpha channel." + )] + + # Save mask + if save_image(mask_data, output_path): + text = f"✅ Mask created successfully!\n" + text += f"📁 Source: {Path(image_path).name}\n" + text += f"💾 Saved to: {Path(output_path).name}" + else: + text = "❌ Failed to save mask image" + + return [TextContent(type="text", text=text)] + + except Exception as e: + logger.error(f"Error in handle_create_mask_from_alpha: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ Mask creation error: {str(e)}" + )] + + async def handle_edit_image_from_file(self, arguments: Dict[str, Any]) -> List[TextContent | ImageContent]: + """ + Handle edit_image_from_file tool call + - Read file from INPUT_PATH directory specified by input_image_name + - If file size > MAX_IMAGE_SIZE_MB, convert to WebP lossy format + - Save converted file to generated_images as {base_name}_000.png + - Edit the image using OpenAI + - Save result to GENERATED_IMAGES_PATH as {base_name}_001.png (output) + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + input_image_name = arguments.get('input_image_name') + + if not input_image_name: + return [TextContent( + type="text", + text="❌ input_image_name is required" + )] + + # Read from INPUT_PATH directory + input_file_path = self.config.input_path / input_image_name + + if not input_file_path.exists(): + # Enhanced error message with debug info + error_text = f"❌ File not found in input directory: {input_image_name}\n" + error_text += f"📁 Looking in: {self.config.input_path}\n" + error_text += f"🔍 Full path: {input_file_path}\n" + error_text += f"📂 Input directory exists: {self.config.input_path.exists()}\n" + + # List available files in input directory + if self.config.input_path.exists(): + files = [f.name for f in self.config.input_path.iterdir() if f.is_file()] + if files: + error_text += f"📋 Available files: {', '.join(files[:10])}" # Show max 10 files + if len(files) > 10: + error_text += f" and {len(files) - 10} more..." + else: + error_text += "📋 No files found in input directory" + else: + error_text += "⚠️ Input directory does not exist" + + return [TextContent( + type="text", + text=error_text + )] + + try: + with open(input_file_path, 'rb') as f: + image_data = f.read() + logger.info(f"📁 Read image from INPUT_PATH: {input_image_name} ({len(image_data) / 1024:.1f} KB)") + except Exception as e: + return [TextContent( + type="text", + text=f"❌ Failed to read file from INPUT_PATH: {str(e)}" + )] + + # Check file size and optimize if needed + original_size_mb = get_file_size_mb(input_file_path) + optimization_info = None + + if original_size_mb > self.config.max_image_size_mb: + logger.info(f"🔄 File size ({original_size_mb:.2f}MB) exceeds limit ({self.config.max_image_size_mb}MB), converting to WebP...") + + try: + # Convert to WebP lossy format (lower initial quality) + optimized_data = optimize_image_to_size_limit( + str(input_file_path), + max_size_mb=self.config.max_image_size_mb, + format='WEBP', + initial_quality=75 # Lowered from 85 to 75 + ) + + optimized_size_mb = len(optimized_data) / (1024 * 1024) + + # Generate base name and save optimized image to generated_images as {base_name}_000.png + base_name = self.config.generate_base_name_simple() + self.config.ensure_output_directory() + + # Save optimized WebP as PNG in generated_images + optimized_path = self.config.get_output_path(base_name, 0, 'png') + + if save_image(optimized_data, str(optimized_path)): + logger.info(f"✅ Optimized image saved as: {optimized_path.name}") + + # Use optimized data for OpenAI API + image_data = optimized_data + + optimization_info = { + "optimized": True, + "original_size_mb": original_size_mb, + "final_size_mb": optimized_size_mb, + "format_used": "WEBP", + "saved_as": str(optimized_path.name) + } + else: + return [TextContent( + type="text", + text=f"❌ Failed to save optimized image to {optimized_path}" + )] + + except Exception as e: + logger.error(f"Image optimization failed: {e}") + return [TextContent( + type="text", + text=f"❌ Image optimization failed: {str(e)}" + )] + else: + # Generate base name and save original to generated_images as {base_name}_000.png + base_name = self.config.generate_base_name_simple() + self.config.ensure_output_directory() + + # Save original as PNG in generated_images + original_path = self.config.get_output_path(base_name, 0, 'png') + + if save_image(image_data, str(original_path)): + logger.info(f"✅ Original image saved as: {original_path.name}") + else: + return [TextContent( + type="text", + text=f"❌ Failed to save original image to {original_path}" + )] + + # Convert image data to base64 for OpenAI API + input_image_b64 = encode_image_base64(image_data) + + # Create edit request directly (not using handle_edit_image to avoid duplicate saving) + prompt = sanitize_prompt(arguments['prompt']) + + request = ImageEditRequest( + image_path=str(input_file_path), # Use original path for processing + prompt=prompt, + background=arguments.get('background', 'transparent') + ) + + # Override the image data in the request with our optimized data + # We need to modify the client to accept image_data directly + # For now, save temp file and use that path + temp_optimized_path = self.config.base_path / 'temp' / f"optimized_{input_image_name}" + temp_optimized_path.parent.mkdir(exist_ok=True) + + if save_image(image_data, str(temp_optimized_path)): + request.image_path = str(temp_optimized_path) + + # Process edit + response = await self.client.edit_image(request) + + if not response.success: + return [TextContent( + type="text", + text=f"❌ Edit failed: {response.error_message}" + )] + + # Save output image as {base_name}_001.png + saved_path = None + json_path = None + if arguments.get('save_to_file', True): + output_path = self.config.get_output_path(base_name, 1, 'png') + + if save_image(response.edited_image_data, str(output_path)): + saved_path = str(output_path) + + # Save parameters as {base_name}_001.json + if self.config.save_parameters: + params_dict = { + "base_name": base_name, + "timestamp": datetime.now().isoformat(), + "prompt": request.prompt, + "background": request.background, + "input_image_name": input_image_name, + "input_file_path": str(input_file_path), + "input_size": get_image_dimensions(str(input_file_path)), + "output_size": response.image_size, + "execution_time": response.execution_time, + "optimization": optimization_info, + "token_stats": response.token_stats if response.token_stats else None, + "config": { + "model": Config.MODEL, + "quality": Config.QUALITY, + "api_version": "gpt-image-1" + } + } + + json_path = self.config.get_output_path(base_name, 1, 'json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(params_dict, f, indent=2, ensure_ascii=False) + logger.info(f"Parameters saved to: {json_path}") + + # Clean up temporary optimized file + if temp_optimized_path.exists(): + try: + temp_optimized_path.unlink() + except Exception: + pass # Ignore cleanup errors + + # Prepare response + contents = [] + + # Add text description + text = f"✅ Image edited successfully from file!\n" + text += f"📝 Input: {input_image_name}\n" + text += f"📁 Base name: {base_name}\n" + text += f"📐 Size: {response.image_size[0]}x{response.image_size[1]}\n" + text += f"⏱️ Processing time: {response.execution_time:.1f}s\n" + + # Add optimization info if image was optimized + if optimization_info and optimization_info["optimized"]: + text += f"\n🔄 Image was auto-optimized:\n" + text += f" Original: {optimization_info['original_size_mb']:.2f}MB\n" + text += f" Optimized: {optimization_info['final_size_mb']:.2f}MB\n" + text += f" Format: {optimization_info['format_used']}\n" + text += f" Saved as: {optimization_info['saved_as']}\n" + + if saved_path: + text += f"\n💾 Output: {Path(saved_path).name}" + if json_path: + text += f"\n📋 Parameters: {Path(json_path).name}" + + contents.append(TextContent(type="text", text=text)) + + # Add image preview + image_b64 = encode_image_base64(response.edited_image_data) + contents.append(ImageContent( + type="image", + data=image_b64, + mimeType="image/png" + )) + + # Reset seed for next session + self._reset_seed() + + return contents + + except Exception as e: + logger.error(f"Error in handle_edit_image_from_file: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ File-based image edit error: {str(e)}" + )] + + async def handle_edit_with_mask_from_file(self, arguments: Dict[str, Any]) -> List[TextContent | ImageContent]: + """ + Handle edit_with_mask_from_file tool call + - Read files from INPUT_PATH directory specified by input_image_name and mask_image_name + - If file size > MAX_IMAGE_SIZE_MB, convert to WebP lossy format + - Save converted files to generated_images as {base_name}_000.png and {base_name}_mask_000.png + - Edit the image using OpenAI with mask + - Save result to GENERATED_IMAGES_PATH as {base_name}_001.png (output) + + Args: + arguments: Tool arguments + + Returns: + List of content items + """ + try: + input_image_name = arguments.get('input_image_name') + mask_image_name = arguments.get('mask_image_name') + + if not input_image_name: + return [TextContent( + type="text", + text="❌ input_image_name is required" + )] + + if not mask_image_name: + return [TextContent( + type="text", + text="❌ mask_image_name is required" + )] + + # Read image from INPUT_PATH directory + input_file_path = self.config.input_path / input_image_name + mask_file_path = self.config.input_path / mask_image_name + + if not input_file_path.exists(): + # Enhanced error message with debug info + error_text = f"❌ Image file not found in input directory: {input_image_name}\n" + error_text += f"📁 Looking in: {self.config.input_path}\n" + error_text += f"🔍 Full path: {input_file_path}\n" + error_text += f"📂 Input directory exists: {self.config.input_path.exists()}\n" + + # List available files in input directory + if self.config.input_path.exists(): + files = [f.name for f in self.config.input_path.iterdir() if f.is_file()] + if files: + error_text += f"📋 Available files: {', '.join(files[:10])}" # Show max 10 files + if len(files) > 10: + error_text += f" and {len(files) - 10} more..." + else: + error_text += "📋 No files found in input directory" + else: + error_text += "⚠️ Input directory does not exist" + + return [TextContent( + type="text", + text=error_text + )] + + if not mask_file_path.exists(): + # Enhanced error message with debug info + error_text = f"❌ Mask file not found in input directory: {mask_image_name}\n" + error_text += f"📁 Looking in: {self.config.input_path}\n" + error_text += f"🔍 Full path: {mask_file_path}\n" + error_text += f"📂 Input directory exists: {self.config.input_path.exists()}\n" + + # List available files in input directory + if self.config.input_path.exists(): + files = [f.name for f in self.config.input_path.iterdir() if f.is_file()] + if files: + error_text += f"📋 Available files: {', '.join(files[:10])}" # Show max 10 files + if len(files) > 10: + error_text += f" and {len(files) - 10} more..." + else: + error_text += "📋 No files found in input directory" + else: + error_text += "⚠️ Input directory does not exist" + + return [TextContent( + type="text", + text=error_text + )] + + try: + with open(input_file_path, 'rb') as f: + image_data_bytes = f.read() + logger.info(f"📁 Read image from INPUT_PATH: {input_image_name} ({len(image_data_bytes) / 1024:.1f} KB)") + except Exception as e: + return [TextContent( + type="text", + text=f"❌ Failed to read image file from INPUT_PATH: {str(e)}" + )] + + try: + with open(mask_file_path, 'rb') as f: + mask_data_bytes = f.read() + logger.info(f"📁 Read mask from INPUT_PATH: {mask_image_name} ({len(mask_data_bytes) / 1024:.1f} KB)") + except Exception as e: + return [TextContent( + type="text", + text=f"❌ Failed to read mask file from INPUT_PATH: {str(e)}" + )] + + # Check image file size and optimize if needed + original_size_mb = get_file_size_mb(input_file_path) + image_optimization_info = None + + if original_size_mb > self.config.max_image_size_mb: + logger.info(f"🔄 Image file size ({original_size_mb:.2f}MB) exceeds limit ({self.config.max_image_size_mb}MB), converting to WebP...") + + try: + # Convert to WebP lossy format + optimized_data = optimize_image_to_size_limit( + str(input_file_path), + max_size_mb=self.config.max_image_size_mb, + format='WEBP', + initial_quality=85 + ) + + optimized_size_mb = len(optimized_data) / (1024 * 1024) + + # Generate base name and save optimized image + base_name = self.config.generate_base_name_simple() + self.config.ensure_output_directory() + + # Save optimized WebP as PNG in generated_images + optimized_path = self.config.get_output_path(base_name, 0, 'png') + + if save_image(optimized_data, str(optimized_path)): + logger.info(f"✅ Optimized image saved as: {optimized_path.name}") + + # Use optimized data for OpenAI API + image_data_bytes = optimized_data + + image_optimization_info = { + "optimized": True, + "original_size_mb": original_size_mb, + "final_size_mb": optimized_size_mb, + "format_used": "WEBP", + "saved_as": str(optimized_path.name) + } + else: + return [TextContent( + type="text", + text=f"❌ Failed to save optimized image to {optimized_path}" + )] + + except Exception as e: + logger.error(f"Image optimization failed: {e}") + return [TextContent( + type="text", + text=f"❌ Image optimization failed: {str(e)}" + )] + else: + # Generate base name and save original to generated_images as {base_name}_000.png + base_name = self.config.generate_base_name_simple() + self.config.ensure_output_directory() + + # Save original as PNG in generated_images + original_path = self.config.get_output_path(base_name, 0, 'png') + + if save_image(image_data_bytes, str(original_path)): + logger.info(f"✅ Original image saved as: {original_path.name}") + else: + return [TextContent( + type="text", + text=f"❌ Failed to save original image to {original_path}" + )] + + # Save mask file as {base_name}_mask_000.png + mask_path = self.config.get_output_path(f"{base_name}_mask", 0, 'png') + + if save_image(mask_data_bytes, str(mask_path)): + logger.info(f"✅ Mask saved as: {mask_path.name}") + else: + return [TextContent( + type="text", + text=f"❌ Failed to save mask to {mask_path}" + )] + + # Create edit request directly + prompt = sanitize_prompt(arguments['prompt']) + + # Create temporary files for processing + temp_image_path = self.config.base_path / 'temp' / f"optimized_{input_image_name}" + temp_mask_path = self.config.base_path / 'temp' / f"mask_{mask_image_name}" + temp_image_path.parent.mkdir(exist_ok=True) + + # Save temp files for processing + if not (save_image(image_data_bytes, str(temp_image_path)) and + save_image(mask_data_bytes, str(temp_mask_path))): + return [TextContent( + type="text", + text="❌ Failed to save temporary files for processing" + )] + + request = ImageEditRequest( + image_path=str(temp_image_path), + prompt=prompt, + mask_path=str(temp_mask_path), + background=arguments.get('background', 'transparent') + ) + + # Process edit + response = await self.client.edit_image(request) + + if not response.success: + # Clean up temp files + for temp_file in [temp_image_path, temp_mask_path]: + if temp_file.exists(): + try: + temp_file.unlink() + except Exception: + pass + + return [TextContent( + type="text", + text=f"❌ Masked edit failed: {response.error_message}" + )] + + # Save output image as {base_name}_001.png + saved_path = None + json_path = None + if arguments.get('save_to_file', True): + output_path = self.config.get_output_path(base_name, 1, 'png') + + if save_image(response.edited_image_data, str(output_path)): + saved_path = str(output_path) + + # Save parameters + if self.config.save_parameters: + params_dict = { + "base_name": base_name, + "timestamp": datetime.now().isoformat(), + "prompt": request.prompt, + "background": request.background, + "input_image_name": input_image_name, + "mask_image_name": mask_image_name, + "input_file_path": str(input_file_path), + "mask_file_path": str(mask_file_path), + "input_size": get_image_dimensions(str(input_file_path)), + "output_size": response.image_size, + "execution_time": response.execution_time, + "image_optimization": image_optimization_info, + "config": { + "model": Config.MODEL, + "quality": Config.QUALITY, + "api_version": "gpt-image-1" + } + } + + json_path = self.config.get_output_path(base_name, 1, 'json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(params_dict, f, indent=2, ensure_ascii=False) + + # Clean up temporary files + for temp_file in [temp_image_path, temp_mask_path]: + if temp_file.exists(): + try: + temp_file.unlink() + except Exception: + pass + + # Prepare response + contents = [] + + text = f"✅ Image edited with mask successfully from files!\n" + text += f"📝 Input: {input_image_name}\n" + text += f"🎭 Mask: {mask_image_name}\n" + text += f"📁 Base name: {base_name}\n" + text += f"📐 Size: {response.image_size[0]}x{response.image_size[1]}\n" + text += f"⏱️ Processing time: {response.execution_time:.1f}s\n" + + # Add optimization info if image was optimized + if image_optimization_info and image_optimization_info["optimized"]: + text += f"\n🔄 Image was auto-optimized:\n" + text += f" Original: {image_optimization_info['original_size_mb']:.2f}MB\n" + text += f" Optimized: {image_optimization_info['final_size_mb']:.2f}MB\n" + text += f" Format: {image_optimization_info['format_used']}\n" + text += f" Saved as: {image_optimization_info['saved_as']}\n" + + if saved_path: + text += f"\n💾 Output: {Path(saved_path).name}" + if json_path: + text += f"\n📋 Parameters: {Path(json_path).name}" + + contents.append(TextContent(type="text", text=text)) + + # Add image preview + image_b64 = encode_image_base64(response.edited_image_data) + contents.append(ImageContent( + type="image", + data=image_b64, + mimeType="image/png" + )) + + # Reset seed for next session + self._reset_seed() + + return contents + + except Exception as e: + logger.error(f"Error in handle_edit_with_mask_from_file: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ File-based masked edit error: {str(e)}" + )] + + async def handle_move_temp_to_output(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Handle move_temp_to_output tool call + + Args: + arguments: Tool arguments containing temp_file_name + + Returns: + List of content items + """ + try: + temp_file_name = arguments.get('temp_file_name') + output_file_name = arguments.get('output_file_name') + copy_only = arguments.get('copy_only', False) + + if not temp_file_name: + return [TextContent( + type="text", + text="❌ temp_file_name is required" + )] + + # Get temp file path + temp_file_path = self.config.base_path / 'temp' / temp_file_name + + # Check if temp file exists + if not Path(temp_file_path).exists(): + return [TextContent( + type="text", + text=f"❌ Temp file not found: {temp_file_name}" + )] + + # Generate output file name if not provided + if not output_file_name: + base_name = self.config.generate_base_name_simple() + file_ext = Path(temp_file_name).suffix[1:] or 'png' + output_file_name = f"{base_name}_001.{file_ext}" + + # Ensure output directory exists + self.config.ensure_output_directory() + + # Get output path + output_path = self.config.generated_images_path / output_file_name + + # Move or copy file + try: + if copy_only: + import shutil + shutil.copy2(temp_file_path, output_path) + operation = "copied" + else: + # Move file + import shutil + shutil.move(temp_file_path, output_path) + operation = "moved" + + # Verify operation was successful + if not output_path.exists(): + raise RuntimeError(f"File {operation} verification failed") + + logger.info(f"📁 File {operation}: {temp_file_name} -> {output_file_name}") + + # Get file size for reporting + file_size_mb = output_path.stat().st_size / (1024 * 1024) + + text = f"✅ File {operation} successfully!\n" + text += f"📁 From temp: {temp_file_name}\n" + text += f"📁 To output: {output_file_name}\n" + text += f"💾 Size: {file_size_mb:.2f}MB" + + return [TextContent(type="text", text=text)] + + except PermissionError as e: + return [TextContent( + type="text", + text=f"❌ Permission denied: {str(e)}" + )] + except shutil.Error as e: + return [TextContent( + type="text", + text=f"❌ File operation failed: {str(e)}" + )] + + except Exception as e: + logger.error(f"Error in handle_move_temp_to_output: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ File move error: {str(e)}" + )] diff --git a/src/server/mcp_server.py b/src/server/mcp_server.py new file mode 100644 index 0000000..4b1fa35 --- /dev/null +++ b/src/server/mcp_server.py @@ -0,0 +1,241 @@ +"""MCP Server implementation for GPTEdit""" + +import logging +import sys +from typing import Dict, Any, List, Union, Optional + +from mcp.server import Server +from mcp.types import ( + Tool, + TextContent, + ImageContent, + Prompt, + PromptMessage, + Resource +) + +from ..connector import Config +from .models import MCPToolDefinitions +from .handlers import ToolHandlers, sanitize_args_for_logging + +# Set up logger to use stderr to avoid interfering with stdout +logger = logging.getLogger(__name__) + + +class GPTEditMCPServer: + """GPTEdit MCP server class""" + + def __init__(self, config: Config): + """Initialize server""" + self.config = config + self.server = Server("gptedit") # Simplified server name + self.handlers = ToolHandlers(config) + + # Register handlers + self._register_handlers() + + logger.info("GPTEdit MCP Server initialized") + logger.info(f"Model: {Config.MODEL}") + logger.info(f"Max image size: {config.max_image_size_mb}MB") + logger.info(f"Server name: gptedit") + + def _register_handlers(self) -> None: + """Register MCP handlers""" + + @self.server.list_tools() + async def handle_list_tools() -> List[Tool]: + """Return list of available tools""" + logger.debug("list_tools called") + tools = MCPToolDefinitions.get_all_tools() + logger.info(f"Returning {len(tools)} tools") + return tools + + @self.server.call_tool() + async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[Union[TextContent, ImageContent]]: + """Handle tool calls""" + # Log tool call safely without exposing sensitive data + safe_args = sanitize_args_for_logging(arguments) + logger.info(f"Tool called: {name} with arguments: {safe_args}") + + try: + if name == "edit_image": + return await self.handlers.handle_edit_image(arguments) + elif name == "edit_with_mask": + return await self.handlers.handle_edit_with_mask(arguments) + elif name == "batch_edit": + return await self.handlers.handle_batch_edit(arguments) + elif name == "validate_image": + return await self.handlers.handle_validate_image(arguments) + elif name == "create_mask_from_alpha": + return await self.handlers.handle_create_mask_from_alpha(arguments) + elif name == "edit_image_from_file": + return await self.handlers.handle_edit_image_from_file(arguments) + elif name == "edit_with_mask_from_file": + return await self.handlers.handle_edit_with_mask_from_file(arguments) + elif name == "move_temp_to_output": + return await self.handlers.handle_move_temp_to_output(arguments) + else: + error_msg = f"Unknown tool: {name}" + logger.error(error_msg) + return [TextContent(type="text", text=f"❌ {error_msg}")] + + except Exception as e: + logger.error(f"Error handling tool {name}: {e}", exc_info=True) + return [TextContent( + type="text", + text=f"❌ Error processing {name}: {str(e)}" + )] + + @self.server.list_prompts() + async def handle_list_prompts() -> List[Prompt]: + """Return list of available prompts""" + logger.debug("list_prompts called") + prompts = [ + Prompt( + name="edit_image", + description="Edit an image with AI-powered instructions", + arguments=[ + { + "name": "image_path", + "description": "Path to the image to edit", + "required": True + }, + { + "name": "edit_description", + "description": "Description of how to edit the image", + "required": True + } + ] + ), + Prompt( + name="edit_with_mask_prompt", + description="Edit an image with a mask", + arguments=[ + { + "name": "image_path", + "description": "Path to the image to edit", + "required": True + }, + { + "name": "mask_path", + "description": "Path to the mask image", + "required": True + }, + { + "name": "edit_description", + "description": "Description of how to edit the masked areas", + "required": True + } + ] + ), + Prompt( + name="optimize_and_edit", + description="Automatically optimize and edit an image", + arguments=[ + { + "name": "image_path", + "description": "Path to the image to edit", + "required": True + }, + { + "name": "edit_description", + "description": "Description of how to edit the image", + "required": True + } + ] + ) + ] + logger.info(f"Returning {len(prompts)} prompts") + return prompts + + @self.server.get_prompt() + async def handle_get_prompt(name: str, arguments: Optional[Dict[str, Any]] = None) -> List[PromptMessage]: + """Get a specific prompt""" + logger.debug(f"get_prompt called for: {name}") + + if name == "edit_image": + if not arguments or "image_path" not in arguments or "edit_description" not in arguments: + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text="Please provide an image_path and edit_description" + ) + )] + + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text=f"Edit the image at '{arguments['image_path']}' with the following instructions: {arguments['edit_description']}" + ) + )] + + elif name == "edit_with_mask_prompt": + if not arguments or "image_path" not in arguments or "mask_path" not in arguments or "edit_description" not in arguments: + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text="Please provide image_path, mask_path, and edit_description" + ) + )] + + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text=f"Edit the image at '{arguments['image_path']}' using mask '{arguments['mask_path']}' with: {arguments['edit_description']}" + ) + )] + + elif name == "optimize_and_edit": + if not arguments or "image_path" not in arguments or "edit_description" not in arguments: + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text="Please provide an image_path and edit_description" + ) + )] + + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text=f"Optimize and edit the image at '{arguments['image_path']}' with: {arguments['edit_description']}" + ) + )] + + else: + return [PromptMessage( + role="user", + content=TextContent( + type="text", + text=f"Unknown prompt: {name}" + ) + )] + + @self.server.list_resources() + async def handle_list_resources() -> List[Resource]: + """Return list of available resources""" + logger.debug("list_resources called") + # GPTEdit doesn't expose file resources directly, return empty list + resources = [] + logger.info(f"Returning {len(resources)} resources") + return resources + + # Note: read_resource is not implemented as GPTEdit doesn't expose resources + # The MCP server will handle the "not implemented" response automatically + + # Log all registered handlers + logger.info("Registered MCP handlers:") + logger.info(" - list_tools") + logger.info(" - call_tool") + logger.info(" - list_prompts") + logger.info(" - get_prompt") + logger.info(" - list_resources") + + def get_server(self) -> Server: + """Return MCP server instance""" + return self.server diff --git a/src/server/models.py b/src/server/models.py new file mode 100644 index 0000000..dbe1a6f --- /dev/null +++ b/src/server/models.py @@ -0,0 +1,290 @@ +"""MCP Tool Models for GPTEdit""" + +from typing import List, Dict, Any +from mcp.types import Tool + + +class MCPToolDefinitions: + """MCP tool definitions for GPTEdit""" + + @staticmethod + def get_edit_image_tool() -> Tool: + """Get edit_image tool definition""" + return Tool( + name="edit_image", + description="Edit an existing image using AI-powered editing with OpenAI GPT-Image-1", + inputSchema={ + "type": "object", + "properties": { + "input_image_b64": { + "type": "string", + "description": "Base64 encoded input image data to edit (supports PNG, JPEG, WebP, etc.)" + }, + "prompt": { + "type": "string", + "description": "Description of how to edit the image" + }, + "background": { + "type": "string", + "enum": ["transparent", "opaque"], + "default": "transparent", + "description": "Background type for the edited image" + }, + "save_to_file": { + "type": "boolean", + "default": True, + "description": "Whether to save the edited image to a file" + } + }, + "required": ["input_image_b64", "prompt"] + } + ) + + @staticmethod + def get_edit_with_mask_tool() -> Tool: + """Get edit_with_mask tool definition""" + return Tool( + name="edit_with_mask", + description="Edit an image with a mask to specify which areas to modify", + inputSchema={ + "type": "object", + "properties": { + "input_image_b64": { + "type": "string", + "description": "Base64 encoded input image data to edit (supports PNG, JPEG, WebP, etc.)" + }, + "input_mask_b64": { + "type": "string", + "description": "Base64 encoded mask image (white areas will be edited)" + }, + "prompt": { + "type": "string", + "description": "Description of how to edit the masked areas" + }, + "background": { + "type": "string", + "enum": ["transparent", "opaque"], + "default": "transparent", + "description": "Background type for the edited image" + }, + "save_to_file": { + "type": "boolean", + "default": True, + "description": "Whether to save the edited image to a file" + } + }, + "required": ["input_image_b64", "input_mask_b64", "prompt"] + } + ) + + @staticmethod + def get_batch_edit_tool() -> Tool: + """Get batch_edit tool definition""" + return Tool( + name="batch_edit", + description="Edit multiple images with the same or different prompts (max 16 images)", + inputSchema={ + "type": "object", + "properties": { + "edits": { + "type": "array", + "description": "Array of edit configurations", + "items": { + "type": "object", + "properties": { + "input_image_name": { + "type": "string", + "description": "Name of the image file in temp directory" + }, + "prompt": { + "type": "string", + "description": "Edit prompt for this image" + }, + "mask_path": { + "type": "string", + "description": "Optional mask path" + }, + "background": { + "type": "string", + "enum": ["transparent", "opaque"], + "default": "transparent" + } + }, + "required": ["input_image_name", "prompt"] + }, + "maxItems": 16 + }, + "save_to_file": { + "type": "boolean", + "default": True, + "description": "Whether to save edited images to files" + } + }, + "required": ["edits"] + } + ) + + @staticmethod + def get_validate_image_tool() -> Tool: + """Get validate_image tool definition""" + return Tool( + name="validate_image", + description="Validate an image file for editing (check size, format, dimensions)", + inputSchema={ + "type": "object", + "properties": { + "image_path": { + "type": "string", + "description": "Path to the image file to validate" + } + }, + "required": ["image_path"] + } + ) + + @staticmethod + def get_create_mask_from_alpha_tool() -> Tool: + """Get create_mask_from_alpha tool definition""" + return Tool( + name="create_mask_from_alpha", + description="Create a mask image from the alpha channel of a PNG image", + inputSchema={ + "type": "object", + "properties": { + "image_path": { + "type": "string", + "description": "Path to the PNG image with alpha channel" + }, + "output_path": { + "type": "string", + "description": "Path where the mask image will be saved (optional, will auto-generate if not provided)" + } + }, + "required": ["image_path"] + } + ) + + @staticmethod + def get_edit_image_from_file_tool() -> Tool: + """Get edit_image_from_file tool definition (for file paths only)""" + return Tool( + name="edit_image_from_file", + description="Edit an image file by providing the file path (alternative to base64 input)", + inputSchema={ + "type": "object", + "properties": { + "input_image_name": { + "type": "string", + "description": "Name of the uploaded image file in temp directory" + }, + "prompt": { + "type": "string", + "description": "Description of how to edit the image" + }, + "background": { + "type": "string", + "enum": ["transparent", "opaque"], + "default": "transparent", + "description": "Background type for the edited image" + }, + "save_to_file": { + "type": "boolean", + "default": True, + "description": "Whether to save the edited image to a file" + }, + "auto_move_to_output": { + "type": "boolean", + "default": True, + "description": "Whether to automatically move uploaded input to output directory after editing" + } + }, + "required": ["input_image_name", "prompt"] + } + ) + + @staticmethod + def get_edit_with_mask_from_file_tool() -> Tool: + """Get edit_with_mask_from_file tool definition (for file paths only)""" + return Tool( + name="edit_with_mask_from_file", + description="Edit an image with mask by providing file paths (alternative to base64 input)", + inputSchema={ + "type": "object", + "properties": { + "input_image_name": { + "type": "string", + "description": "Name of the image file in temp directory" + }, + "mask_image_name": { + "type": "string", + "description": "Name of the mask image file in temp directory" + }, + "prompt": { + "type": "string", + "description": "Description of how to edit the masked areas" + }, + "background": { + "type": "string", + "enum": ["transparent", "opaque"], + "default": "transparent", + "description": "Background type for the edited image" + }, + "save_to_file": { + "type": "boolean", + "default": True, + "description": "Whether to save the edited image to a file" + }, + "auto_move_to_output": { + "type": "boolean", + "default": True, + "description": "Whether to automatically move uploaded files to output directory after editing" + } + }, + "required": ["input_image_name", "mask_image_name", "prompt"] + } + ) + + + @staticmethod + def get_move_temp_to_output_tool() -> Tool: + """Get move_temp_to_output tool definition""" + return Tool( + name="move_temp_to_output", + description="Move file from temp directory to output (generated_images) directory", + inputSchema={ + "type": "object", + "properties": { + "temp_file_name": { + "type": "string", + "description": "Name of the file in temp directory to move" + }, + "output_file_name": { + "type": "string", + "description": "Optional: Desired name for the output file (will auto-generate if not provided)" + }, + "copy_only": { + "type": "boolean", + "default": False, + "description": "If true, copy file instead of moving (keep original in temp)" + } + }, + "required": ["temp_file_name"] + } + ) + + @staticmethod + def get_all_tools() -> List[Tool]: + """Get all available tools""" + return [ + # Main editing tools + MCPToolDefinitions.get_edit_image_tool(), + MCPToolDefinitions.get_edit_with_mask_tool(), + MCPToolDefinitions.get_batch_edit_tool(), + # File-based tools (recommended) + MCPToolDefinitions.get_edit_image_from_file_tool(), + MCPToolDefinitions.get_edit_with_mask_from_file_tool(), + # Utility tools + MCPToolDefinitions.get_validate_image_tool(), + MCPToolDefinitions.get_create_mask_from_alpha_tool(), + MCPToolDefinitions.get_move_temp_to_output_tool() + ] diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..d3cc938 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,34 @@ +"""Utils module""" + +from .image_utils import * +from .validation import * +from .token_utils import * + +__all__ = [ + # Image utils + 'validate_image_file', + 'convert_to_png', + 'get_image_dimensions', + 'get_image_dimensions_from_bytes', + 'ensure_transparent_background', + 'ensure_opaque_background', + 'save_image', + 'create_mask_from_alpha', + 'encode_image_base64', + 'decode_image_base64', + + # Validation utils + 'validate_edit_parameters', + 'validate_batch_parameters', + 'sanitize_prompt', + 'validate_api_response', + + # Token utils + 'estimate_token_count', + 'get_token_limit_for_size', + 'determine_optimal_size_for_aspect_ratio', + 'validate_prompt_length', + 'get_prompt_stats', + 'truncate_prompt_to_fit', + 'suggest_quality_for_prompt' +] diff --git a/src/utils/image_utils.py b/src/utils/image_utils.py new file mode 100644 index 0000000..587a2e6 --- /dev/null +++ b/src/utils/image_utils.py @@ -0,0 +1,705 @@ +"""Enhanced image utility functions with automatic WebP conversion and memory optimization""" + +import base64 +import io +import logging +import gc +from pathlib import Path +from typing import Tuple, Optional, Union, Iterator + +try: + from PIL import Image +except ImportError: + raise ImportError("Pillow is required. Install with: pip install pillow") + +logger = logging.getLogger(__name__) + + +def get_file_size_mb(file_path: Union[str, Path]) -> float: + """Get file size in MB""" + path = Path(file_path) + if path.exists(): + return path.stat().st_size / (1024 * 1024) + return 0.0 + + +def get_image_size_from_bytes(data: bytes) -> float: + """Get size of image data in MB""" + return len(data) / (1024 * 1024) + + +def validate_image_file(file_path: str, max_size_mb: int = 4) -> Tuple[bool, float, Optional[str]]: + """ + Validate an image file (updated to allow larger files for conversion) + + Args: + file_path: Path to image file + max_size_mb: Maximum file size in MB (soft limit - will convert if exceeded) + + Returns: + tuple: (is_valid, size_mb, error_message) + """ + try: + path = Path(file_path) + + # Check if file exists + if not path.exists(): + return False, 0, f"File not found: {file_path}" + + # Check file size + size_mb = get_file_size_mb(path) + + # Check if it's a valid image + try: + with Image.open(file_path) as img: + # Verify it's a supported format + if img.format not in ['PNG', 'JPEG', 'JPG', 'GIF', 'BMP', 'WEBP', 'TIFF']: + return False, size_mb, f"Unsupported format: {img.format}" + + # Check dimensions + width, height = img.size + if width > 4096 or height > 4096: + return False, size_mb, f"Image dimensions too large: {width}x{height} (max: 4096x4096)" + + except Exception as e: + return False, size_mb, f"Invalid image file: {str(e)}" + + # Note: We don't fail on size anymore, just return the info + return True, size_mb, None if size_mb <= max_size_mb else f"File size {size_mb:.2f}MB exceeds {max_size_mb}MB limit" + + except Exception as e: + logger.error(f"Error validating image: {e}") + return False, 0, str(e) + + +def optimize_image_to_size_limit( + image_path: str, + max_size_mb: float = 4.0, + format: str = 'WEBP', + initial_quality: int = 95 +) -> bytes: + """ + Optimize image to fit within size limit using WebP or other format + + Args: + image_path: Path to input image + max_size_mb: Maximum size in MB + format: Output format (WEBP recommended for best compression) + initial_quality: Starting quality for optimization + + Returns: + bytes: Optimized image data + """ + max_size_bytes = max_size_mb * 1024 * 1024 + + try: + with Image.open(image_path) as img: + # Convert to RGB if needed (WebP doesn't support all modes) + if format == 'WEBP': + if img.mode in ('RGBA', 'LA', 'PA'): + # Keep alpha channel for WebP + if img.mode != 'RGBA': + img = img.convert('RGBA') + elif img.mode not in ('RGB', 'L'): + img = img.convert('RGB') + + # Try different quality levels (more aggressive) + quality = initial_quality + min_quality = 50 # Lowered minimum quality for better compression + + best_data = None + best_quality = 0 + + # Binary search for optimal quality + low_quality = min_quality + high_quality = initial_quality + + while low_quality <= high_quality: + mid_quality = (low_quality + high_quality) // 2 + + buffer = io.BytesIO() + save_kwargs = { + 'format': format, + 'quality': mid_quality, + 'optimize': True + } + + # WebP specific options for better compression + if format == 'WEBP': + save_kwargs['method'] = 6 # Slowest/best compression + save_kwargs['lossless'] = False # Use lossy for better compression + + img.save(buffer, **save_kwargs) + data = buffer.getvalue() + + if len(data) <= max_size_bytes: + # This quality works, try higher + best_data = data + best_quality = mid_quality + low_quality = mid_quality + 1 + else: + # Too large, try lower quality + high_quality = mid_quality - 1 + + if best_data: + size_mb = len(best_data) / (1024 * 1024) + logger.info(f"✅ Image optimized: {size_mb:.2f}MB at quality {best_quality} using {format}") + return best_data + + # If still too large, try more aggressive compression + logger.warning("Standard optimization failed, trying aggressive compression...") + + # Resize if necessary (more aggressive scaling) + scale = 0.8 # Start with smaller scale + while scale > 0.3: # Allow more aggressive resizing + new_width = int(img.width * scale) + new_height = int(img.height * scale) + + resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) + + buffer = io.BytesIO() + resized.save(buffer, format=format, quality=min_quality, optimize=True, method=6) + data = buffer.getvalue() + + if len(data) <= max_size_bytes: + size_mb = len(data) / (1024 * 1024) + logger.warning(f"⚠️ Image resized to {new_width}x{new_height} ({scale*100:.0f}%) to fit size limit: {size_mb:.2f}MB") + return data + + scale -= 0.1 + + raise ValueError(f"Cannot optimize image to under {max_size_mb}MB even with resizing") + + except Exception as e: + logger.error(f"Error optimizing image: {e}") + raise + + +def convert_to_png_with_size_limit( + file_path: str, + max_size_mb: float = 4.0, + prefer_webp: bool = True +) -> Tuple[bytes, str]: + """ + Convert image to PNG or WebP format with size limit + + Args: + file_path: Path to image file + max_size_mb: Maximum size in MB + prefer_webp: If True and size exceeds limit, use WebP instead of PNG + + Returns: + tuple: (image_data, format_used) + """ + try: + # First check current file size + current_size_mb = get_file_size_mb(file_path) + + with Image.open(file_path) as img: + # Store original dimensions for logging + original_dims = img.size + + # Convert to RGBA if not already + if img.mode != 'RGBA': + if img.mode == 'P': # Palette mode + img = img.convert('RGBA') + elif img.mode in ('L', 'LA'): # Grayscale + img = img.convert('RGBA') + elif img.mode == 'RGB': + # Add alpha channel + img = img.convert('RGBA') + else: + img = img.convert('RGBA') + + # Try PNG first + buffer = io.BytesIO() + img.save(buffer, format='PNG', optimize=True) + png_data = buffer.getvalue() + png_size_mb = len(png_data) / (1024 * 1024) + + if png_size_mb <= max_size_mb: + logger.info(f"✅ Image converted to PNG: {png_size_mb:.2f}MB") + return png_data, 'PNG' + + # PNG is too large, need to optimize + logger.info(f"PNG too large ({png_size_mb:.2f}MB), optimizing...") + + if prefer_webp: + # Try WebP for better compression + logger.info("Converting to WebP for better compression...") + webp_data = optimize_image_to_size_limit( + file_path, + max_size_mb=max_size_mb, + format='WEBP', + initial_quality=95 + ) + return webp_data, 'WEBP' + else: + # Try to optimize PNG (limited options) + buffer = io.BytesIO() + + # Try reducing color depth or resizing + if img.mode == 'RGBA': + # Try converting to palette mode for smaller size + img_palette = img.convert('P', palette=Image.ADAPTIVE, colors=256) + buffer = io.BytesIO() + img_palette.save(buffer, format='PNG', optimize=True) + data = buffer.getvalue() + + if len(data) <= max_size_mb * 1024 * 1024: + size_mb = len(data) / (1024 * 1024) + logger.info(f"✅ PNG optimized with palette: {size_mb:.2f}MB") + return data, 'PNG' + + # If still too large, resize + scale = 0.9 + while scale > 0.5: + new_width = int(img.width * scale) + new_height = int(img.height * scale) + + resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) + buffer = io.BytesIO() + resized.save(buffer, format='PNG', optimize=True) + data = buffer.getvalue() + + if len(data) <= max_size_mb * 1024 * 1024: + size_mb = len(data) / (1024 * 1024) + logger.warning(f"⚠️ PNG resized to {new_width}x{new_height} to fit: {size_mb:.2f}MB") + return data, 'PNG' + + scale -= 0.1 + + raise ValueError(f"Cannot optimize PNG to under {max_size_mb}MB") + + except Exception as e: + logger.error(f"Error converting image: {e}") + raise + + +def convert_to_png(file_path: str) -> bytes: + """ + Legacy function - Convert an image to PNG format + Now uses the size-limited version internally + + Args: + file_path: Path to image file + + Returns: + bytes: PNG image data + """ + data, _ = convert_to_png_with_size_limit(file_path, max_size_mb=4.0) + return data + + +def get_image_dimensions(file_path: str) -> Tuple[int, int]: + """ + Get image dimensions + + Args: + file_path: Path to image file + + Returns: + tuple: (width, height) + """ + try: + with Image.open(file_path) as img: + return img.size + except Exception as e: + logger.error(f"Error getting dimensions: {e}") + return (0, 0) + + +def get_image_dimensions_from_bytes(image_data: bytes) -> Tuple[int, int]: + """ + Get image dimensions from bytes + + Args: + image_data: Image data as bytes + + Returns: + tuple: (width, height) + """ + try: + with Image.open(io.BytesIO(image_data)) as img: + return img.size + except Exception as e: + logger.error(f"Error getting dimensions from bytes: {e}") + return (0, 0) + + +def ensure_transparent_background(image_data: bytes) -> bytes: + """ + Ensure image has transparent background (for game characters) + + Args: + image_data: PNG image data + + Returns: + bytes: PNG with transparent background + """ + try: + with Image.open(io.BytesIO(image_data)) as img: + # Ensure RGBA mode + if img.mode != 'RGBA': + img = img.convert('RGBA') + + # Note: Actual background removal would require more sophisticated + # techniques like rembg or similar. This is a placeholder. + + buffer = io.BytesIO() + img.save(buffer, format='PNG', optimize=True) + return buffer.getvalue() + + except Exception as e: + logger.error(f"Error ensuring transparent background: {e}") + return image_data + + +def ensure_opaque_background(image_data: bytes, bg_color: Tuple[int, int, int] = (255, 255, 255)) -> bytes: + """ + Ensure image has opaque background (for game backgrounds) + + Args: + image_data: PNG image data + bg_color: Background color RGB tuple + + Returns: + bytes: PNG with opaque background + """ + try: + with Image.open(io.BytesIO(image_data)) as img: + # Convert to RGBA first + if img.mode != 'RGBA': + img = img.convert('RGBA') + + # Create new image with background + background = Image.new('RGBA', img.size, bg_color + (255,)) + + # Composite the image over the background + background.paste(img, (0, 0), img) + + # Convert to RGB (remove alpha) + final = background.convert('RGB') + + buffer = io.BytesIO() + final.save(buffer, format='PNG', optimize=True) + return buffer.getvalue() + + except Exception as e: + logger.error(f"Error ensuring opaque background: {e}") + return image_data + + +def save_image(image_data: bytes, output_path: str) -> bool: + """ + Save image data to file with enhanced directory handling + + Args: + image_data: Image data as bytes + output_path: Output file path + + Returns: + bool: Success status + """ + try: + path = Path(output_path) + + # Ensure parent directory exists with proper error handling + try: + path.parent.mkdir(parents=True, exist_ok=True) + + # Verify directory was created successfully + if not path.parent.exists(): + logger.error(f"Failed to create directory: {path.parent}") + return False + + # Check if parent is actually a directory + if not path.parent.is_dir(): + logger.error(f"Parent path exists but is not a directory: {path.parent}") + return False + + except PermissionError as e: + logger.error(f"Permission denied creating directory {path.parent}: {e}") + return False + except Exception as e: + logger.error(f"Failed to create directory {path.parent}: {e}") + return False + + # Save the image file + try: + with open(path, 'wb') as f: + f.write(image_data) + + # Verify file was written successfully + if not path.exists() or path.stat().st_size != len(image_data): + logger.error(f"File save verification failed: {path}") + return False + + logger.info(f"Image saved: {path} ({len(image_data):,} bytes)") + return True + + except PermissionError as e: + logger.error(f"Permission denied writing file {path}: {e}") + return False + except OSError as e: + logger.error(f"OS error writing file {path}: {e}") + return False + + except Exception as e: + logger.error(f"Unexpected error saving image to {output_path}: {e}") + return False + + +def create_mask_from_alpha(image_path: str) -> Optional[bytes]: + """ + Create a mask from image alpha channel + + Args: + image_path: Path to image with alpha channel + + Returns: + bytes: Mask image data (or None if no alpha) + """ + try: + with Image.open(image_path) as img: + if img.mode != 'RGBA': + logger.warning("Image has no alpha channel for mask creation") + return None + + # Extract alpha channel + alpha = img.split()[-1] + + # Convert to binary mask (black/white) + mask = Image.new('L', img.size, 0) + mask.paste(alpha, (0, 0)) + + # Threshold to make it binary + threshold = 128 + mask = mask.point(lambda p: 255 if p > threshold else 0) + + buffer = io.BytesIO() + mask.save(buffer, format='PNG') + return buffer.getvalue() + + except Exception as e: + logger.error(f"Error creating mask: {e}") + return None + + +def encode_image_base64(image_data: bytes) -> str: + """ + Encode image data to base64 string + + Args: + image_data: Image bytes + + Returns: + str: Base64 encoded string + """ + return base64.b64encode(image_data).decode('utf-8') + + +def decode_image_base64(base64_str: str) -> bytes: + """ + Decode base64 string to image data + Supports both raw base64 and data URL formats + + Args: + base64_str: Base64 encoded string (with or without data URL prefix) + + Returns: + bytes: Image data + """ + # Handle data URL format (e.g., "data:image/jpeg;base64,...") + if base64_str.startswith('data:'): + # Find the comma that separates the header from data + comma_index = base64_str.find(',') + if comma_index != -1: + base64_str = base64_str[comma_index + 1:] + else: + raise ValueError("Invalid data URL format: no comma found") + + # Remove any whitespace/newlines + base64_str = base64_str.strip().replace('\n', '').replace('\r', '') + + try: + return base64.b64decode(base64_str) + except Exception as e: + raise ValueError(f"Failed to decode base64 data: {e}") + + +def encode_image_chunked(image_data: bytes, chunk_size: int = 8192) -> str: + """ + Encode image data to base64 in chunks to reduce memory usage + + Args: + image_data: Image bytes + chunk_size: Size of chunks for processing + + Returns: + str: Base64 encoded string + """ + if len(image_data) < chunk_size * 2: + # For small images, use regular encoding + return base64.b64encode(image_data).decode('utf-8') + + # For large images, use chunked encoding + result = [] + for i in range(0, len(image_data), chunk_size): + chunk = image_data[i:i + chunk_size] + result.append(base64.b64encode(chunk).decode('utf-8')) + + return ''.join(result) + + +def process_image_stream(image_path: str, max_size_mb: float = 4.0) -> io.BytesIO: + """ + Process image as a stream to minimize memory usage + + Args: + image_path: Path to input image + max_size_mb: Maximum size in MB + + Returns: + io.BytesIO: Processed image stream + """ + try: + with Image.open(image_path) as img: + # Check if optimization is needed + current_size = Path(image_path).stat().st_size / (1024 * 1024) + + if current_size <= max_size_mb: + # No optimization needed, return as stream + buffer = io.BytesIO() + img.save(buffer, format='PNG', optimize=True) + buffer.seek(0) + return buffer + + # Optimization needed + return _optimize_to_stream(img, max_size_mb) + + except Exception as e: + logger.error(f"Error processing image stream: {e}") + raise + + +def _optimize_to_stream(img: Image.Image, max_size_mb: float) -> io.BytesIO: + """ + Optimize image and return as stream + + Args: + img: PIL Image object + max_size_mb: Maximum size in MB + + Returns: + io.BytesIO: Optimized image stream + """ + max_size_bytes = max_size_mb * 1024 * 1024 + + # Try WebP first for better compression + if img.mode in ('RGBA', 'LA', 'PA'): + if img.mode != 'RGBA': + img = img.convert('RGBA') + elif img.mode not in ('RGB', 'L'): + img = img.convert('RGB') + + # Binary search for optimal quality + low_quality = 60 + high_quality = 95 + best_buffer = None + + while low_quality <= high_quality: + mid_quality = (low_quality + high_quality) // 2 + + buffer = io.BytesIO() + save_kwargs = { + 'format': 'WEBP', + 'quality': mid_quality, + 'optimize': True, + 'method': 6 + } + + img.save(buffer, **save_kwargs) + + if buffer.tell() <= max_size_bytes: + # This quality works, try higher + best_buffer = buffer + low_quality = mid_quality + 1 + else: + # Too large, try lower quality + high_quality = mid_quality - 1 + + if best_buffer: + best_buffer.seek(0) + return best_buffer + + # If still too large, try resizing + logger.warning("Quality optimization failed, trying resize...") + return _resize_to_stream(img, max_size_mb) + + +def _resize_to_stream(img: Image.Image, max_size_mb: float) -> io.BytesIO: + """ + Resize image to fit size limit and return as stream + + Args: + img: PIL Image object + max_size_mb: Maximum size in MB + + Returns: + io.BytesIO: Resized image stream + """ + max_size_bytes = max_size_mb * 1024 * 1024 + scale = 1.0 + + while scale > 0.5: + new_width = int(img.width * scale) + new_height = int(img.height * scale) + + resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) + + buffer = io.BytesIO() + resized.save(buffer, format='WEBP', quality=80, optimize=True, method=6) + + if buffer.tell() <= max_size_bytes: + buffer.seek(0) + logger.info(f"Image resized to {new_width}x{new_height} ({scale*100:.0f}%) to fit size limit") + return buffer + + scale -= 0.1 + + raise ValueError(f"Cannot optimize image to under {max_size_mb}MB even with aggressive resizing") + + +def memory_efficient_batch_convert(image_paths: list, max_size_mb: float = 4.0) -> Iterator[Tuple[str, bytes]]: + """ + Convert multiple images efficiently with memory management + + Args: + image_paths: List of image file paths + max_size_mb: Maximum size per image in MB + + Yields: + Tuple[str, bytes]: (image_path, converted_image_data) + """ + for i, image_path in enumerate(image_paths): + try: + logger.info(f"Processing image {i+1}/{len(image_paths)}: {Path(image_path).name}") + + # Process image + stream = process_image_stream(image_path, max_size_mb) + image_data = stream.getvalue() + + yield image_path, image_data + + # Cleanup + stream.close() + del stream, image_data + + # Force garbage collection every few images + if (i + 1) % 3 == 0: + gc.collect() + + except Exception as e: + logger.error(f"Error processing {image_path}: {e}") + yield image_path, b'' # Return empty bytes on error diff --git a/src/utils/token_utils.py b/src/utils/token_utils.py new file mode 100644 index 0000000..912cae9 --- /dev/null +++ b/src/utils/token_utils.py @@ -0,0 +1,237 @@ +"""Token utilities for GPTEdit""" + +import logging +from typing import Tuple, Dict + +logger = logging.getLogger(__name__) + +# Token limits for GPT-Image-1 based on quality and dimensions +TOKEN_LIMITS = { + "low": { + "256x256": 272, # Square small (estimated) + "512x512": 272, # Square medium (estimated) + "1024x1024": 272, # Square + "1024x1536": 408, # Portrait + "1536x1024": 400, # Landscape + }, + "medium": { + "256x256": 1056, # Square small (estimated) + "512x512": 1056, # Square medium (estimated) + "1024x1024": 1056, # Square + "1024x1536": 1584, # Portrait + "1536x1024": 1568, # Landscape + }, + "high": { + "256x256": 4160, # Square small (using square limit) + "512x512": 4160, # Square medium (using square limit) + "1024x1024": 4160, # Square + "1024x1536": 6240, # Portrait + "1536x1024": 6208, # Landscape + } +} + +# Quality setting (fixed as per requirements) +QUALITY = "high" + + +def estimate_token_count(text: str) -> int: + """ + Estimate token count for a text string + + Simple approximation: + - Average English word ≈ 1.3 tokens + - Average character ≈ 0.25 tokens + + Args: + text: Input text + + Returns: + int: Estimated token count + """ + # Remove extra whitespace + text = ' '.join(text.split()) + + # Estimate based on characters (more consistent) + # GPT models typically use ~4 characters per token on average + char_estimate = len(text) / 4 + + # Also calculate word-based estimate + word_count = len(text.split()) + word_estimate = word_count * 1.3 + + # Use the higher estimate to be safe + estimate = max(char_estimate, word_estimate) + + return int(estimate) + + +def get_token_limit_for_size(size: str, quality: str = QUALITY) -> int: + """ + Get token limit for a specific size and quality + + Args: + size: Image size (e.g., "1024x1024") + quality: Quality setting (low/medium/high) + + Returns: + int: Token limit for the configuration + """ + quality = quality.lower() + + if quality not in TOKEN_LIMITS: + logger.warning(f"Unknown quality '{quality}', defaulting to 'high'") + quality = "high" + + if size in TOKEN_LIMITS[quality]: + return TOKEN_LIMITS[quality][size] + + # Default to square limit if size not found + logger.warning(f"Unknown size '{size}', defaulting to square limit") + return TOKEN_LIMITS[quality]["1024x1024"] + + +def determine_optimal_size_for_aspect_ratio(width: int, height: int) -> Tuple[str, str]: + """ + Determine optimal size based on aspect ratio + + Args: + width: Image width + height: Image height + + Returns: + tuple: (size_string, aspect_type) + """ + aspect_ratio = width / height if height > 0 else 1.0 + + # For smaller images, use smaller sizes + max_dim = max(width, height) + + if max_dim <= 256: + return "256x256", "square" + elif max_dim <= 512: + return "512x512", "square" + + # For larger images, consider aspect ratio + if aspect_ratio > 1.3: # Landscape + return "1536x1024", "landscape" + elif aspect_ratio < 0.77: # Portrait + return "1024x1536", "portrait" + else: # Square-ish + return "1024x1024", "square" + + +def validate_prompt_length(prompt: str, size: str = "1024x1024", quality: str = QUALITY) -> Tuple[bool, int, str]: + """ + Validate if prompt length is within token limits + + Args: + prompt: The prompt text + size: Target image size + quality: Quality setting + + Returns: + tuple: (is_valid, token_count, error_message) + """ + token_count = estimate_token_count(prompt) + token_limit = get_token_limit_for_size(size, quality) + + if token_count > token_limit: + error_msg = f"Prompt too long: ~{token_count} tokens (limit: {token_limit} for {size} at {quality} quality)" + return False, token_count, error_msg + + # Warning if close to limit (90%) + if token_count > token_limit * 0.9: + logger.warning(f"Prompt approaching token limit: {token_count}/{token_limit} tokens") + + return True, token_count, "" + + +def get_prompt_stats(prompt: str, size: str = "1024x1024", quality: str = QUALITY) -> Dict[str, any]: + """ + Get detailed prompt statistics + + Args: + prompt: The prompt text + size: Target image size + quality: Quality setting + + Returns: + dict: Statistics including token count, limit, and usage percentage + """ + token_count = estimate_token_count(prompt) + token_limit = get_token_limit_for_size(size, quality) + + return { + "estimated_tokens": token_count, + "token_limit": token_limit, + "usage_percentage": round((token_count / token_limit) * 100, 1), + "remaining_tokens": token_limit - token_count, + "quality": quality, + "size": size, + "is_valid": token_count <= token_limit + } + + +def truncate_prompt_to_fit(prompt: str, size: str = "1024x1024", quality: str = QUALITY, buffer: float = 0.95) -> str: + """ + Truncate prompt to fit within token limits + + Args: + prompt: The prompt text + size: Target image size + quality: Quality setting + buffer: Use this percentage of max tokens (default 95%) + + Returns: + str: Truncated prompt that fits within limits + """ + token_limit = get_token_limit_for_size(size, quality) + target_tokens = int(token_limit * buffer) + + current_tokens = estimate_token_count(prompt) + + if current_tokens <= target_tokens: + return prompt + + # Binary search for optimal length + words = prompt.split() + left, right = 0, len(words) + best_fit = "" + + while left <= right: + mid = (left + right) // 2 + test_prompt = ' '.join(words[:mid]) + test_tokens = estimate_token_count(test_prompt) + + if test_tokens <= target_tokens: + best_fit = test_prompt + left = mid + 1 + else: + right = mid - 1 + + if best_fit and best_fit != prompt: + logger.warning(f"Prompt truncated from ~{current_tokens} to ~{estimate_token_count(best_fit)} tokens") + + return best_fit if best_fit else ' '.join(words[:10]) # Fallback to first 10 words + + +def suggest_quality_for_prompt(prompt: str, size: str = "1024x1024") -> str: + """ + Suggest appropriate quality level based on prompt length + + Args: + prompt: The prompt text + size: Target image size + + Returns: + str: Suggested quality level + """ + token_count = estimate_token_count(prompt) + + # Check each quality level + for quality in ["low", "medium", "high"]: + limit = get_token_limit_for_size(size, quality) + if token_count <= limit * 0.9: # Leave 10% buffer + return quality + + return "high" # Default to high even if over limit (will need truncation) diff --git a/src/utils/validation.py b/src/utils/validation.py new file mode 100644 index 0000000..ba9aac4 --- /dev/null +++ b/src/utils/validation.py @@ -0,0 +1,127 @@ +"""Validation utilities for GPTEdit""" + +import logging +from pathlib import Path +from typing import List, Dict, Any, Optional + +logger = logging.getLogger(__name__) + + +def validate_edit_parameters(params: Dict[str, Any]) -> tuple[bool, Optional[str]]: + """ + Validate image edit parameters + + Args: + params: Dictionary of parameters + + Returns: + tuple: (is_valid, error_message) + """ + required_fields = ['image_path', 'prompt'] + + # Check required fields + for field in required_fields: + if field not in params or not params[field]: + return False, f"Missing required field: {field}" + + # Validate image path + image_path = Path(params['image_path']) + if not image_path.exists(): + return False, f"Image file not found: {params['image_path']}" + + # Validate mask path if provided + if 'mask_path' in params and params['mask_path']: + mask_path = Path(params['mask_path']) + if not mask_path.exists(): + return False, f"Mask file not found: {params['mask_path']}" + + # Validate background option + if 'background' in params: + if params['background'] not in ['transparent', 'opaque']: + return False, f"Invalid background option: {params['background']}" + + # Validate size if provided + if 'size' in params and params['size']: + valid_sizes = ['256x256', '512x512', '1024x1024'] + if params['size'] not in valid_sizes: + return False, f"Invalid size: {params['size']}. Must be one of {valid_sizes}" + + return True, None + + +def validate_batch_parameters(batch_params: List[Dict[str, Any]]) -> tuple[bool, Optional[str]]: + """ + Validate batch edit parameters + + Args: + batch_params: List of parameter dictionaries + + Returns: + tuple: (is_valid, error_message) + """ + if not batch_params: + return False, "No images provided for batch editing" + + if len(batch_params) > 16: + return False, f"Too many images: {len(batch_params)} (maximum 16)" + + # Validate each item + for i, params in enumerate(batch_params): + is_valid, error_msg = validate_edit_parameters(params) + if not is_valid: + return False, f"Item {i+1}: {error_msg}" + + return True, None + + +def sanitize_prompt(prompt: str) -> str: + """ + Sanitize user prompt + + Args: + prompt: User prompt + + Returns: + str: Sanitized prompt + """ + # Remove excessive whitespace + prompt = ' '.join(prompt.split()) + + # Limit length (OpenAI has limits) + max_length = 1000 + if len(prompt) > max_length: + prompt = prompt[:max_length] + logger.warning(f"Prompt truncated to {max_length} characters") + + return prompt + + +def validate_api_response(response: Any) -> bool: + """ + Validate API response structure + + Args: + response: API response object + + Returns: + bool: True if valid + """ + try: + if not response: + return False + + if not hasattr(response, 'data'): + return False + + if not response.data: + return False + + # Check for base64 data + if not hasattr(response.data[0], 'b64_json'): + return False + + return True + + except Exception as e: + logger.error(f"Error validating response: {e}") + return False diff --git a/temp_delete_marker.txt b/temp_delete_marker.txt new file mode 100644 index 0000000..f425ba5 --- /dev/null +++ b/temp_delete_marker.txt @@ -0,0 +1 @@ +# This file was removed during cleanup diff --git a/test_api_key.py b/test_api_key.py new file mode 100644 index 0000000..352bcfa --- /dev/null +++ b/test_api_key.py @@ -0,0 +1 @@ +# REMOVED: Test file cleaned up during project organization diff --git a/test_size_optimization.py b/test_size_optimization.py new file mode 100644 index 0000000..352bcfa --- /dev/null +++ b/test_size_optimization.py @@ -0,0 +1 @@ +# REMOVED: Test file cleaned up during project organization diff --git a/test_timeout_fix.py b/test_timeout_fix.py new file mode 100644 index 0000000..3fa5c99 --- /dev/null +++ b/test_timeout_fix.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +"""Test script for timeout fixes""" + +import asyncio +import logging +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent / 'src')) + +from src.connector import Config, OpenAIEditClient, ImageEditRequest + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) + +logger = logging.getLogger(__name__) + +async def test_image_edit(): + """Test image editing with new timeout settings""" + try: + # Initialize config + config = Config() + logger.info(f"Timeout setting: {config.default_timeout}s") + logger.info(f"Max image size: {config.max_image_size_mb}MB") + + # Find test image + input_path = config.input_path / "imagen4.png" + if not input_path.exists(): + logger.error(f"Test image not found: {input_path}") + return False + + # Check original size + original_size = input_path.stat().st_size / (1024 * 1024) + logger.info(f"Original image size: {original_size:.2f}MB") + + # Create client + client = OpenAIEditClient(config) + + # Create edit request + request = ImageEditRequest( + image_path=str(input_path), + prompt="Add magical sparkles around the fairy", + background="transparent", + auto_optimize=True + ) + + logger.info("Starting edit request...") + start_time = asyncio.get_event_loop().time() + + # Process edit + response = await client.edit_image(request) + + end_time = asyncio.get_event_loop().time() + total_time = end_time - start_time + + if response.success: + logger.info(f"✅ Edit successful in {total_time:.1f}s") + logger.info(f"Output size: {response.image_size}") + + # Save result + output_path = config.generated_images_path / "test_timeout_fix.png" + if output_path.parent.exists(): + with open(output_path, 'wb') as f: + f.write(response.edited_image_data) + logger.info(f"Saved to: {output_path}") + + return True + else: + logger.error(f"❌ Edit failed: {response.error_message}") + logger.error(f"Error type: {response.error_type}") + return False + + except Exception as e: + logger.error(f"Test failed with exception: {e}") + return False + +if __name__ == "__main__": + # Run test + success = asyncio.run(test_image_edit()) + sys.exit(0 if success else 1) diff --git a/test_verification.py b/test_verification.py new file mode 100644 index 0000000..352bcfa --- /dev/null +++ b/test_verification.py @@ -0,0 +1 @@ +# REMOVED: Test file cleaned up during project organization diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..da57b53 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests module init""" diff --git a/tests/image_utils_backup.py b/tests/image_utils_backup.py new file mode 100644 index 0000000..6aa482e --- /dev/null +++ b/tests/image_utils_backup.py @@ -0,0 +1 @@ +# REMOVED: Backup file cleaned up during project organization diff --git a/tests/test_b64.py b/tests/test_b64.py new file mode 100644 index 0000000..1160ae7 --- /dev/null +++ b/tests/test_b64.py @@ -0,0 +1,48 @@ +"""Test script for base64 image input""" + +import base64 +import asyncio +from pathlib import Path +from src.server.handlers import ToolHandlers +from src.connector import Config + +async def test_b64_edit(): + """Test editing with base64 input""" + + # Initialize config and handlers + config = Config() + handlers = ToolHandlers(config) + + # Read test image and convert to base64 + test_image_path = Path("inputs/test.png") # You need to put a test image here + + if not test_image_path.exists(): + print(f"Please place a test image at {test_image_path}") + return + + with open(test_image_path, "rb") as f: + image_data = f.read() + + image_b64 = base64.b64encode(image_data).decode('utf-8') + + # Test edit_image with base64 + print("Testing edit_image with base64 input...") + + arguments = { + "input_image_b64": image_b64, + "prompt": "Make the image more colorful and vibrant", + "background": "transparent", + "save_to_file": True + } + + result = await handlers.handle_edit_image(arguments) + + # Print result + for content in result: + if hasattr(content, 'text'): + print(content.text) + + print("\nTest completed!") + +if __name__ == "__main__": + asyncio.run(test_b64_edit()) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..69f971f --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +""" +Configuration Test Script for GPTEdit +Tests that all configuration options are working correctly +""" + +import sys +import os +from pathlib import Path +from datetime import datetime + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + + +def test_configuration(): + """Test configuration loading and validation""" + print("=" * 60) + print("GPTEdit Configuration Test") + print("=" * 60) + + try: + from src.connector.config import Config + + # Load configuration + print("\n1. Loading configuration...") + config = Config() + print("✓ Configuration loaded") + + # Display configuration + print("\n2. Current Configuration:") + print("-" * 40) + print(config) + print("-" * 40) + + # Validate configuration + print("\n3. Validating configuration...") + if config.validate(): + print("✓ Configuration is valid") + else: + print("✗ Configuration validation failed") + return False + + # Test directories + print("\n4. Testing directories:") + + # Output directory + if config.edited_images_path.exists(): + print(f"✓ Output directory exists: {config.edited_images_path}") + + # Test write permission + test_file = config.edited_images_path / "test_write.tmp" + try: + test_file.write_text("test") + test_file.unlink() + print(" ✓ Write permission confirmed") + except Exception as e: + print(f" ✗ Cannot write to output directory: {e}") + else: + print(f"✗ Output directory not found: {config.edited_images_path}") + + # Temp directory + if config.temp_path.exists(): + print(f"✓ Temp directory exists: {config.temp_path}") + else: + print(f"✗ Temp directory not found: {config.temp_path}") + + # Test file naming + print("\n5. Testing file naming:") + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + test_filename = config.output_filename_format.format( + prefix=config.output_filename_prefix, + timestamp=timestamp + ) + print(f" Sample filename: {test_filename}") + + # Test API key + print("\n6. API Key Status:") + if config.api_key: + if config.api_key.startswith('sk-'): + print(f" ✓ API key configured (ends with ...{config.api_key[-4:]})") + else: + print(" ⚠ API key doesn't start with 'sk-' - might be invalid") + else: + print(" ✗ API key not set") + print(" Please set OPENAI_API_KEY in .env file or environment") + + # Test feature flags + print("\n7. Feature Flags:") + print(f" Auto-optimize: {config.enable_auto_optimize}") + print(f" Auto-mask: {config.enable_auto_mask}") + print(f" Save originals: {config.save_originals}") + + # Test image size limits + print("\n8. Image Processing Settings:") + print(f" Max image size: {config.max_image_size_mb} MB") + print(f" Max bytes: {config.get_max_image_size_bytes():,}") + + print("\n" + "=" * 60) + print("✅ Configuration test completed successfully!") + print("=" * 60) + + return True + + except ImportError as e: + print(f"\n✗ Import error: {e}") + print(" Make sure you're in the gptedit directory") + return False + except Exception as e: + print(f"\n✗ Unexpected error: {e}") + import traceback + traceback.print_exc() + return False + + +def test_env_file(): + """Check .env file status""" + print("\n" + "=" * 60) + print("Environment File Check") + print("=" * 60) + + env_path = Path(".env") + env_example_path = Path(".env.example") + + if env_path.exists(): + print("✓ .env file exists") + + # Check if it has API key + with open(env_path, 'r') as f: + content = f.read() + if 'OPENAI_API_KEY=' in content: + if 'your-api-key-here' in content: + print("⚠ .env contains placeholder API key") + print(" Please update with your actual OpenAI API key") + else: + print("✓ .env contains API key configuration") + else: + print("✗ .env file not found") + if env_example_path.exists(): + print(" ℹ .env.example exists - copy it to .env:") + print(" cp .env.example .env") + print(" Then edit .env with your API key") + else: + print(" ✗ .env.example also missing") + + +def main(): + """Run all configuration tests""" + print("\n🔧 GPTEdit Configuration Tester\n") + + # Test environment file + test_env_file() + + # Test configuration + if test_configuration(): + print("\n✅ All configuration tests passed!") + print("\nYou can now:") + print("1. Start the server: python main.py") + print("2. Or configure Claude Desktop with the settings shown above") + return 0 + else: + print("\n❌ Configuration tests failed") + print("\nPlease:") + print("1. Create a .env file from .env.example") + print("2. Add your OpenAI API key") + print("3. Check directory permissions") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_directory_creation.py b/tests/test_directory_creation.py new file mode 100644 index 0000000..58f5633 --- /dev/null +++ b/tests/test_directory_creation.py @@ -0,0 +1,188 @@ +"""Test directory creation and error handling improvements""" + +import tempfile +import shutil +import os +from pathlib import Path +import pytest + +from src.connector import Config + + +class TestDirectoryCreation: + """Test cases for improved directory creation functionality""" + + def setup_method(self): + """Set up test environment""" + # Create temporary directory for testing + self.temp_base = Path(tempfile.mkdtemp(prefix="gptedit_test_")) + + # Override environment variables for testing + os.environ['GENERATED_IMAGES_PATH'] = str(self.temp_base / 'generated_test') + os.environ['TEMP_PATH'] = str(self.temp_base / 'temp_test') + os.environ['OPENAI_API_KEY'] = 'sk-test-key-for-testing' # Required for validation + + def teardown_method(self): + """Clean up test environment""" + # Clean up temporary directory + if self.temp_base.exists(): + shutil.rmtree(self.temp_base) + + # Clean up environment variables + for key in ['GENERATED_IMAGES_PATH', 'TEMP_PATH', 'OPENAI_API_KEY']: + if key in os.environ: + del os.environ[key] + + def test_basic_directory_creation(self): + """Test that directories are created successfully""" + config = Config() + + # Check that both directories exist + assert config.generated_images_path.exists() + assert config.generated_images_path.is_dir() + assert config.temp_path.exists() + assert config.temp_path.is_dir() + + def test_nested_directory_creation(self): + """Test creation of nested directories""" + # Set nested paths + nested_generated = self.temp_base / 'deeply' / 'nested' / 'generated' + nested_temp = self.temp_base / 'deeply' / 'nested' / 'temp' + + os.environ['GENERATED_IMAGES_PATH'] = str(nested_generated) + os.environ['TEMP_PATH'] = str(nested_temp) + + config = Config() + + # Check that nested directories were created + assert config.generated_images_path.exists() + assert config.temp_path.exists() + assert nested_generated.exists() + assert nested_temp.exists() + + def test_ensure_temp_directory_runtime(self): + """Test runtime temp directory recreation""" + config = Config() + + # Delete temp directory + shutil.rmtree(config.temp_path) + assert not config.temp_path.exists() + + # Ensure temp directory should recreate it + config.ensure_temp_directory() + assert config.temp_path.exists() + assert config.temp_path.is_dir() + + def test_ensure_output_directory_runtime(self): + """Test runtime output directory recreation""" + config = Config() + + # Delete output directory + shutil.rmtree(config.generated_images_path) + assert not config.generated_images_path.exists() + + # Ensure output directory should recreate it + config.ensure_output_directory() + assert config.generated_images_path.exists() + assert config.generated_images_path.is_dir() + + def test_directory_permissions(self): + """Test write permissions in created directories""" + config = Config() + + # Test temp directory write permission + test_temp_file = config.temp_path / 'test_write.txt' + test_temp_file.write_text('test') + assert test_temp_file.exists() + test_temp_file.unlink() + + # Test generated_images directory write permission + test_output_file = config.generated_images_path / 'test_write.txt' + test_output_file.write_text('test') + assert test_output_file.exists() + test_output_file.unlink() + + def test_get_output_path_ensures_directory(self): + """Test that get_output_path creates directory if missing""" + config = Config() + + # Delete output directory + shutil.rmtree(config.generated_images_path) + assert not config.generated_images_path.exists() + + # get_output_path should recreate the directory + output_path = config.get_output_path("test_base", 1, "png") + assert config.generated_images_path.exists() + assert output_path.parent == config.generated_images_path + + def test_invalid_directory_path_handling(self): + """Test handling of invalid directory paths""" + # Try to create directories in a location that doesn't exist and can't be created + # This test might need to be adapted based on OS permissions + + # Set an invalid path (on most systems, you can't create directories in root without permissions) + if os.name == 'nt': # Windows + invalid_path = 'C:\\invalid_system_path\\gptedit_test' + else: # Unix-like + invalid_path = '/root/invalid_system_path/gptedit_test' + + os.environ['GENERATED_IMAGES_PATH'] = invalid_path + + # This should raise an exception during initialization + with pytest.raises(RuntimeError): + Config() + + def test_directory_already_exists(self): + """Test that existing directories are handled correctly""" + # Create directories manually first + generated_path = self.temp_base / 'pre_existing_generated' + temp_path = self.temp_base / 'pre_existing_temp' + + generated_path.mkdir(parents=True) + temp_path.mkdir(parents=True) + + # Add some files to verify they're preserved + (generated_path / 'existing_file.txt').write_text('preserved') + (temp_path / 'existing_temp.txt').write_text('preserved') + + # Set environment to use existing directories + os.environ['GENERATED_IMAGES_PATH'] = str(generated_path) + os.environ['TEMP_PATH'] = str(temp_path) + + config = Config() + + # Verify directories still exist and files are preserved + assert config.generated_images_path.exists() + assert config.temp_path.exists() + assert (generated_path / 'existing_file.txt').exists() + assert (temp_path / 'existing_temp.txt').exists() + + +if __name__ == "__main__": + # Run tests + import sys + import subprocess + + # Run with pytest if available + try: + subprocess.run([sys.executable, '-m', 'pytest', __file__, '-v'], check=True) + except (subprocess.CalledProcessError, FileNotFoundError): + # Fallback to basic test runner + print("Running basic tests...") + test_instance = TestDirectoryCreation() + + test_methods = [method for method in dir(test_instance) if method.startswith('test_')] + + for method_name in test_methods: + print(f"Running {method_name}...") + try: + test_instance.setup_method() + method = getattr(test_instance, method_name) + method() + test_instance.teardown_method() + print(f"✅ {method_name} passed") + except Exception as e: + print(f"❌ {method_name} failed: {e}") + test_instance.teardown_method() + + print("Basic tests completed!") diff --git a/tests/test_image_utils.py b/tests/test_image_utils.py new file mode 100644 index 0000000..abf0144 --- /dev/null +++ b/tests/test_image_utils.py @@ -0,0 +1,228 @@ +"""Unit tests for image utilities""" + +import pytest +import tempfile +import os +from pathlib import Path +from PIL import Image +import io + +from src.utils.image_utils import ( + validate_image_file, + convert_to_png, + get_image_dimensions, + get_image_dimensions_from_bytes, + ensure_transparent_background, + ensure_opaque_background, + save_image, + create_mask_from_alpha, + encode_image_base64, + decode_image_base64 +) + + +@pytest.fixture +def temp_image_file(): + """Create a temporary test image""" + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + # Create a simple test image + img = Image.new('RGBA', (100, 100), color=(255, 0, 0, 128)) + img.save(f.name, 'PNG') + yield f.name + # Cleanup + if os.path.exists(f.name): + os.unlink(f.name) + + +@pytest.fixture +def large_image_file(): + """Create a large test image""" + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + # Create a large test image (5000x5000 would be too large) + img = Image.new('RGBA', (5000, 5000), color=(0, 255, 0, 255)) + img.save(f.name, 'PNG') + yield f.name + # Cleanup + if os.path.exists(f.name): + os.unlink(f.name) + + +def test_validate_image_file_valid(temp_image_file): + """Test validation of a valid image file""" + is_valid, size_mb, error_msg = validate_image_file(temp_image_file) + + assert is_valid is True + assert size_mb > 0 + assert error_msg is None + + +def test_validate_image_file_not_exists(): + """Test validation of non-existent file""" + is_valid, size_mb, error_msg = validate_image_file("nonexistent.png") + + assert is_valid is False + assert size_mb == 0 + assert "not found" in error_msg.lower() + + +def test_validate_image_file_too_large(large_image_file): + """Test validation of oversized image""" + is_valid, size_mb, error_msg = validate_image_file(large_image_file, max_size_mb=0.001) + + assert is_valid is False + assert size_mb > 0.001 + assert "too large" in error_msg.lower() + + +def test_validate_image_dimensions_too_large(): + """Test validation of image with dimensions exceeding limits""" + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + img = Image.new('RGBA', (5000, 5000), color=(255, 255, 255, 255)) + img.save(f.name, 'PNG') + + try: + is_valid, size_mb, error_msg = validate_image_file(f.name) + assert is_valid is False + assert "too large" in error_msg.lower() + assert "5000x5000" in error_msg + finally: + os.unlink(f.name) + + +def test_convert_to_png_from_jpeg(): + """Test converting JPEG to PNG""" + with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as f: + # Create a JPEG image + img = Image.new('RGB', (50, 50), color=(255, 0, 0)) + img.save(f.name, 'JPEG') + + try: + png_data = convert_to_png(f.name) + + # Verify it's valid PNG data + img_converted = Image.open(io.BytesIO(png_data)) + assert img_converted.format == 'PNG' + assert img_converted.mode == 'RGBA' + assert img_converted.size == (50, 50) + finally: + os.unlink(f.name) + + +def test_get_image_dimensions(temp_image_file): + """Test getting image dimensions""" + width, height = get_image_dimensions(temp_image_file) + + assert width == 100 + assert height == 100 + + +def test_get_image_dimensions_from_bytes(): + """Test getting dimensions from image bytes""" + img = Image.new('RGBA', (200, 150), color=(0, 0, 255, 255)) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + image_data = buffer.getvalue() + + width, height = get_image_dimensions_from_bytes(image_data) + + assert width == 200 + assert height == 150 + + +def test_ensure_transparent_background(): + """Test ensuring transparent background""" + # Create image with opaque background + img = Image.new('RGB', (50, 50), color=(255, 255, 255)) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + image_data = buffer.getvalue() + + # Process + result_data = ensure_transparent_background(image_data) + + # Verify result has alpha channel + result_img = Image.open(io.BytesIO(result_data)) + assert result_img.mode == 'RGBA' + + +def test_ensure_opaque_background(): + """Test ensuring opaque background""" + # Create image with transparent areas + img = Image.new('RGBA', (50, 50), color=(255, 0, 0, 128)) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + image_data = buffer.getvalue() + + # Process + result_data = ensure_opaque_background(image_data) + + # Verify result is opaque + result_img = Image.open(io.BytesIO(result_data)) + assert result_img.mode == 'RGB' + + +def test_save_image(): + """Test saving image to file""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create test image data + img = Image.new('RGBA', (50, 50), color=(0, 255, 0, 255)) + buffer = io.BytesIO() + img.save(buffer, format='PNG') + image_data = buffer.getvalue() + + # Save + output_path = os.path.join(tmpdir, 'test_output.png') + success = save_image(image_data, output_path) + + assert success is True + assert os.path.exists(output_path) + + # Verify saved image + saved_img = Image.open(output_path) + assert saved_img.size == (50, 50) + + +def test_create_mask_from_alpha(): + """Test creating mask from alpha channel""" + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + # Create image with alpha channel + img = Image.new('RGBA', (50, 50)) + # Make half transparent, half opaque + for x in range(25): + for y in range(50): + img.putpixel((x, y), (255, 0, 0, 0)) # Transparent + for x in range(25, 50): + for y in range(50): + img.putpixel((x, y), (0, 255, 0, 255)) # Opaque + + img.save(f.name, 'PNG') + + try: + mask_data = create_mask_from_alpha(f.name) + + assert mask_data is not None + + # Verify mask + mask_img = Image.open(io.BytesIO(mask_data)) + assert mask_img.mode == 'L' # Grayscale + assert mask_img.size == (50, 50) + finally: + os.unlink(f.name) + + +def test_base64_encoding_decoding(): + """Test base64 encoding and decoding""" + # Create test data + original_data = b"Test image data bytes" + + # Encode + encoded = encode_image_base64(original_data) + assert isinstance(encoded, str) + + # Decode + decoded = decode_image_base64(encoded) + assert decoded == original_data + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..3e0f1c3 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,233 @@ +""" +Simple test runner for GPTEdit +This script runs basic tests without requiring pytest +""" + +import sys +import os +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent +sys.path.insert(0, str(project_root)) + +def test_imports(): + """Test that all modules can be imported""" + print("Testing imports...") + + try: + # Test config import + from src.connector.config import Config + print("✓ Config module imported") + + # Test validation utilities + from src.utils.validation import validate_edit_parameters, sanitize_prompt + print("✓ Validation utilities imported") + + # Test image utilities (without PIL dependency for now) + try: + from src.utils.image_utils import encode_image_base64, decode_image_base64 + print("✓ Image utilities imported") + except ImportError as e: + print(f"⚠ Image utilities require PIL: {e}") + + # Test server models + from src.server.models import MCPToolDefinitions + print("✓ Server models imported") + + # Test MCP server + try: + from src.server.mcp_server import GPTEditMCPServer + print("✓ MCP server imported") + except ImportError as e: + print(f"⚠ MCP server requires mcp package: {e}") + + return True + + except Exception as e: + print(f"✗ Import error: {e}") + return False + + +def test_config(): + """Test configuration functionality""" + print("\nTesting configuration...") + + try: + from src.connector.config import Config + + # Create config instance + config = Config() + print("✓ Config instance created") + + # Test fixed parameters + assert config.MODEL == "gpt-image-1" + assert config.QUALITY == "high" + assert config.NUMBER_OF_IMAGES == 1 + print("✓ Fixed parameters correct") + + # Test size calculation + assert config.get_optimal_size(100, 100) == "256x256" + assert config.get_optimal_size(500, 500) == "512x512" + assert config.get_optimal_size(1000, 1000) == "1024x1024" + print("✓ Size calculation working") + + # Test max size conversion + config.max_image_size_mb = 4 + assert config.get_max_image_size_bytes() == 4 * 1024 * 1024 + print("✓ Size conversion working") + + return True + + except Exception as e: + print(f"✗ Config test error: {e}") + return False + + +def test_validation(): + """Test validation utilities""" + print("\nTesting validation utilities...") + + try: + from src.utils.validation import sanitize_prompt, validate_edit_parameters + + # Test prompt sanitization + prompt = " Test prompt " + sanitized = sanitize_prompt(prompt) + assert sanitized == "Test prompt" + print("✓ Prompt sanitization working") + + # Test long prompt truncation + long_prompt = "x" * 2000 + sanitized_long = sanitize_prompt(long_prompt) + assert len(sanitized_long) == 1000 + print("✓ Prompt truncation working") + + # Test parameter validation (without file check) + params = { + 'image_path': 'test.png', # Won't exist but we're testing structure + 'prompt': 'Edit this image' + } + is_valid, error_msg = validate_edit_parameters(params) + # Should fail because file doesn't exist, but that's expected + assert is_valid is False + assert "not found" in error_msg + print("✓ Parameter validation structure working") + + return True + + except Exception as e: + print(f"✗ Validation test error: {e}") + return False + + +def test_base64_encoding(): + """Test base64 encoding/decoding""" + print("\nTesting base64 encoding...") + + try: + from src.utils.image_utils import encode_image_base64, decode_image_base64 + + # Test data + test_data = b"Test image data" + + # Encode + encoded = encode_image_base64(test_data) + assert isinstance(encoded, str) + print("✓ Base64 encoding working") + + # Decode + decoded = decode_image_base64(encoded) + assert decoded == test_data + print("✓ Base64 decoding working") + + return True + + except ImportError: + print("⚠ Skipping base64 tests (PIL not installed)") + return True + except Exception as e: + print(f"✗ Base64 test error: {e}") + return False + + +def test_tool_definitions(): + """Test MCP tool definitions""" + print("\nTesting tool definitions...") + + try: + from src.server.models import MCPToolDefinitions + + # Get all tools + tools = MCPToolDefinitions.get_all_tools() + assert len(tools) == 5 + print(f"✓ Found {len(tools)} tools") + + # Check tool names + tool_names = [tool.name for tool in tools] + expected_names = [ + "edit_image", + "edit_with_mask", + "batch_edit", + "validate_image", + "create_mask_from_alpha" + ] + + for name in expected_names: + assert name in tool_names + print(f"✓ All expected tools defined: {', '.join(tool_names)}") + + # Check edit_image tool structure + edit_tool = MCPToolDefinitions.get_edit_image_tool() + assert edit_tool.name == "edit_image" + assert "image_path" in edit_tool.inputSchema["properties"] + assert "prompt" in edit_tool.inputSchema["properties"] + print("✓ Tool schema structure correct") + + return True + + except ImportError as e: + print(f"⚠ Tool definitions require mcp package: {e}") + return True + except Exception as e: + print(f"✗ Tool definitions test error: {e}") + return False + + +def main(): + """Run all tests""" + print("=" * 60) + print("GPTEdit Test Suite") + print("=" * 60) + + tests = [ + test_imports, + test_config, + test_validation, + test_base64_encoding, + test_tool_definitions + ] + + passed = 0 + failed = 0 + + for test in tests: + if test(): + passed += 1 + else: + failed += 1 + + print("\n" + "=" * 60) + print(f"Test Results: {passed} passed, {failed} failed") + print("=" * 60) + + if failed == 0: + print("✅ All tests passed!") + return 0 + else: + print(f"❌ {failed} test(s) failed") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000..0d44ac6 --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +"""Test MCP server JSON-RPC communication""" + +import json +import asyncio +import sys +from pathlib import Path + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent)) + +from src.connector.config import Config +from src.server.mcp_server import GPTEditMCPServer + + +async def test_server(): + """Test server methods""" + config = Config() + if not config.validate(): + print("Configuration validation failed") + return + + mcp_server = GPTEditMCPServer(config) + server = mcp_server.get_server() + + print("Testing MCP Server methods...") + print("=" * 50) + + # Test list_tools + print("\n1. Testing list_tools:") + tools = await server.list_tools() + print(f" Found {len(tools)} tools") + for tool in tools: + print(f" - {tool.name}: {tool.description[:50]}...") + + # Test list_prompts + print("\n2. Testing list_prompts:") + try: + prompts = await server.list_prompts() + print(f" Found {len(prompts)} prompts") + for prompt in prompts: + print(f" - {prompt.name}: {prompt.description}") + except Exception as e: + print(f" Error: {e}") + + # Test list_resources + print("\n3. Testing list_resources:") + try: + resources = await server.list_resources() + print(f" Found {len(resources)} resources") + except Exception as e: + print(f" Error: {e}") + + print("\n" + "=" * 50) + print("Test complete!") + + +if __name__ == "__main__": + asyncio.run(test_server()) diff --git a/tests/test_syntax.py b/tests/test_syntax.py new file mode 100644 index 0000000..57798f2 --- /dev/null +++ b/tests/test_syntax.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Test script to check for syntax errors""" + +import sys +import py_compile +from pathlib import Path + +def check_syntax(file_path): + """Check Python file for syntax errors""" + try: + py_compile.compile(str(file_path), doraise=True) + print(f"✓ {file_path.name}: No syntax errors") + return True + except py_compile.PyCompileError as e: + print(f"✗ {file_path.name}: Syntax error!") + print(f" Line {e.exc_value.lineno}: {e.exc_value.msg}") + print(f" {e.exc_value.text}") + return False + +def main(): + """Check all Python files for syntax errors""" + project_root = Path(__file__).parent + src_dir = project_root / "src" + + python_files = [ + src_dir / "connector" / "openai_client.py", + src_dir / "connector" / "config.py", + src_dir / "connector" / "__init__.py", + src_dir / "server" / "mcp_server.py", + src_dir / "server" / "handlers.py", + src_dir / "server" / "models.py", + src_dir / "utils" / "image_utils.py", + src_dir / "utils" / "token_utils.py", + project_root / "main.py" + ] + + print("Checking Python files for syntax errors...") + print("=" * 50) + + errors_found = False + for file_path in python_files: + if file_path.exists(): + if not check_syntax(file_path): + errors_found = True + else: + print(f"⚠ {file_path.name}: File not found") + + print("=" * 50) + if errors_found: + print("❌ Syntax errors found!") + return 1 + else: + print("✅ All files passed syntax check!") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_token_utils.py b/tests/test_token_utils.py new file mode 100644 index 0000000..eb858d9 --- /dev/null +++ b/tests/test_token_utils.py @@ -0,0 +1,205 @@ +"""Unit tests for token utilities""" + +import pytest +from src.utils.token_utils import ( + estimate_token_count, + get_token_limit_for_size, + determine_optimal_size_for_aspect_ratio, + validate_prompt_length, + get_prompt_stats, + truncate_prompt_to_fit, + suggest_quality_for_prompt, + TOKEN_LIMITS +) + + +def test_estimate_token_count(): + """Test token count estimation""" + # Empty string + assert estimate_token_count("") == 0 + + # Short text + count = estimate_token_count("Hello world") + assert 2 <= count <= 4 # Reasonable range + + # Longer text (approximately 4 chars per token) + long_text = "a" * 100 + count = estimate_token_count(long_text) + assert 20 <= count <= 30 + + # With spaces and punctuation + text = "This is a test. With multiple sentences!" + count = estimate_token_count(text) + assert 8 <= count <= 12 + + +def test_get_token_limit_for_size(): + """Test getting token limits for different sizes""" + # High quality limits + assert get_token_limit_for_size("1024x1024", "high") == 4160 + assert get_token_limit_for_size("1024x1536", "high") == 6240 + assert get_token_limit_for_size("1536x1024", "high") == 6208 + + # Medium quality limits + assert get_token_limit_for_size("1024x1024", "medium") == 1056 + assert get_token_limit_for_size("1024x1536", "medium") == 1584 + + # Low quality limits + assert get_token_limit_for_size("1024x1024", "low") == 272 + + # Unknown size should default to square + assert get_token_limit_for_size("999x999", "high") == 4160 + + # Invalid quality should default to high + assert get_token_limit_for_size("1024x1024", "invalid") == 4160 + + +def test_determine_optimal_size_for_aspect_ratio(): + """Test optimal size determination based on aspect ratio""" + # Small square image + size, aspect = determine_optimal_size_for_aspect_ratio(100, 100) + assert size == "256x256" + assert aspect == "square" + + # Medium square image + size, aspect = determine_optimal_size_for_aspect_ratio(400, 400) + assert size == "512x512" + assert aspect == "square" + + # Large square image + size, aspect = determine_optimal_size_for_aspect_ratio(1000, 1000) + assert size == "1024x1024" + assert aspect == "square" + + # Landscape image + size, aspect = determine_optimal_size_for_aspect_ratio(1600, 900) + assert size == "1536x1024" + assert aspect == "landscape" + + # Portrait image + size, aspect = determine_optimal_size_for_aspect_ratio(900, 1600) + assert size == "1024x1536" + assert aspect == "portrait" + + +def test_validate_prompt_length(): + """Test prompt length validation""" + # Short prompt - should be valid + is_valid, tokens, error = validate_prompt_length("Edit this image", "1024x1024", "high") + assert is_valid is True + assert tokens > 0 + assert error == "" + + # Very long prompt - should be invalid + long_prompt = "word " * 2000 # Way over limit + is_valid, tokens, error = validate_prompt_length(long_prompt, "1024x1024", "high") + assert is_valid is False + assert tokens > 4160 # Should exceed high quality limit + assert "too long" in error.lower() + + # Edge case - close to limit (should pass but might warn) + # For 1024x1024 high quality, limit is 4160 tokens + # Approximately 16,640 characters (4 chars per token) + edge_prompt = "a" * 16000 + is_valid, tokens, error = validate_prompt_length(edge_prompt, "1024x1024", "high") + # Should be close to limit + assert tokens > 3000 + + +def test_get_prompt_stats(): + """Test getting prompt statistics""" + prompt = "Make the sky blue and add some clouds" + stats = get_prompt_stats(prompt, "1024x1024", "high") + + assert "estimated_tokens" in stats + assert "token_limit" in stats + assert "usage_percentage" in stats + assert "remaining_tokens" in stats + assert "quality" in stats + assert "size" in stats + assert "is_valid" in stats + + assert stats["token_limit"] == 4160 + assert stats["quality"] == "high" + assert stats["size"] == "1024x1024" + assert stats["is_valid"] is True + assert stats["usage_percentage"] < 10 # Short prompt + + +def test_truncate_prompt_to_fit(): + """Test prompt truncation""" + # Short prompt - should not be truncated + short_prompt = "Edit this image" + truncated = truncate_prompt_to_fit(short_prompt, "1024x1024", "high") + assert truncated == short_prompt + + # Long prompt - should be truncated + long_prompt = " ".join([f"word{i}" for i in range(5000)]) + truncated = truncate_prompt_to_fit(long_prompt, "1024x1024", "high", buffer=0.95) + + # Check that truncated version is shorter + assert len(truncated) < len(long_prompt) + + # Check that truncated version fits within limits + is_valid, tokens, _ = validate_prompt_length(truncated, "1024x1024", "high") + assert is_valid is True + assert tokens < 4160 * 0.95 # Should be within buffer + + +def test_truncate_prompt_with_low_quality(): + """Test prompt truncation with low quality (strict limits)""" + # For low quality square, limit is only 272 tokens + medium_prompt = " ".join([f"word{i}" for i in range(200)]) + truncated = truncate_prompt_to_fit(medium_prompt, "1024x1024", "low") + + # Should be significantly truncated + assert len(truncated) < len(medium_prompt) + + # Verify it fits + is_valid, tokens, _ = validate_prompt_length(truncated, "1024x1024", "low") + assert is_valid is True + assert tokens <= 272 * 0.95 + + +def test_suggest_quality_for_prompt(): + """Test quality suggestion based on prompt length""" + # Very short prompt - should suggest low + short_prompt = "blue sky" + suggested = suggest_quality_for_prompt(short_prompt, "1024x1024") + assert suggested == "low" + + # Medium prompt - should suggest medium + medium_prompt = " ".join([f"word{i}" for i in range(100)]) + suggested = suggest_quality_for_prompt(medium_prompt, "1024x1024") + assert suggested in ["low", "medium"] + + # Long prompt - should suggest high + long_prompt = " ".join([f"word{i}" for i in range(1000)]) + suggested = suggest_quality_for_prompt(long_prompt, "1024x1024") + assert suggested == "high" + + # Very long prompt - still suggests high (will need truncation) + very_long_prompt = " ".join([f"word{i}" for i in range(5000)]) + suggested = suggest_quality_for_prompt(very_long_prompt, "1024x1024") + assert suggested == "high" + + +def test_token_limits_structure(): + """Test that TOKEN_LIMITS has the expected structure""" + assert "low" in TOKEN_LIMITS + assert "medium" in TOKEN_LIMITS + assert "high" in TOKEN_LIMITS + + for quality in TOKEN_LIMITS: + assert "1024x1024" in TOKEN_LIMITS[quality] + assert "1024x1536" in TOKEN_LIMITS[quality] + assert "1536x1024" in TOKEN_LIMITS[quality] + + # Verify high quality limits match documentation + assert TOKEN_LIMITS["high"]["1024x1024"] == 4160 + assert TOKEN_LIMITS["high"]["1024x1536"] == 6240 + assert TOKEN_LIMITS["high"]["1536x1024"] == 6208 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000..05f7fe1 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,197 @@ +"""Unit tests for validation utilities""" + +import pytest +from src.utils.validation import ( + validate_edit_parameters, + validate_batch_parameters, + sanitize_prompt, + validate_api_response +) +import tempfile +from PIL import Image + + +def create_temp_image(): + """Helper to create a temporary image file""" + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: + img = Image.new('RGBA', (100, 100), color=(255, 0, 0, 255)) + img.save(f.name, 'PNG') + return f.name + + +def test_validate_edit_parameters_valid(): + """Test validation of valid edit parameters""" + temp_image = create_temp_image() + + params = { + 'image_path': temp_image, + 'prompt': 'Make it blue' + } + + is_valid, error_msg = validate_edit_parameters(params) + assert is_valid is True + assert error_msg is None + + +def test_validate_edit_parameters_missing_required(): + """Test validation with missing required fields""" + params = { + 'prompt': 'Make it blue' + } + + is_valid, error_msg = validate_edit_parameters(params) + assert is_valid is False + assert 'image_path' in error_msg + + +def test_validate_edit_parameters_invalid_image_path(): + """Test validation with non-existent image""" + params = { + 'image_path': '/nonexistent/image.png', + 'prompt': 'Make it blue' + } + + is_valid, error_msg = validate_edit_parameters(params) + assert is_valid is False + assert 'not found' in error_msg + + +def test_validate_edit_parameters_invalid_background(): + """Test validation with invalid background option""" + temp_image = create_temp_image() + + params = { + 'image_path': temp_image, + 'prompt': 'Make it blue', + 'background': 'invalid_option' + } + + is_valid, error_msg = validate_edit_parameters(params) + assert is_valid is False + assert 'background' in error_msg + + +def test_validate_edit_parameters_invalid_size(): + """Test validation with invalid size""" + temp_image = create_temp_image() + + params = { + 'image_path': temp_image, + 'prompt': 'Make it blue', + 'size': '2048x2048' # Not supported + } + + is_valid, error_msg = validate_edit_parameters(params) + assert is_valid is False + assert 'size' in error_msg + + +def test_validate_batch_parameters_valid(): + """Test validation of valid batch parameters""" + temp_image1 = create_temp_image() + temp_image2 = create_temp_image() + + batch_params = [ + {'image_path': temp_image1, 'prompt': 'Make it blue'}, + {'image_path': temp_image2, 'prompt': 'Make it green'} + ] + + is_valid, error_msg = validate_batch_parameters(batch_params) + assert is_valid is True + assert error_msg is None + + +def test_validate_batch_parameters_empty(): + """Test validation with empty batch""" + batch_params = [] + + is_valid, error_msg = validate_batch_parameters(batch_params) + assert is_valid is False + assert 'No images' in error_msg + + +def test_validate_batch_parameters_too_many(): + """Test validation with too many images""" + temp_image = create_temp_image() + + # Create 17 items (max is 16) + batch_params = [ + {'image_path': temp_image, 'prompt': f'Edit {i}'} + for i in range(17) + ] + + is_valid, error_msg = validate_batch_parameters(batch_params) + assert is_valid is False + assert 'Too many' in error_msg + + +def test_validate_batch_parameters_invalid_item(): + """Test validation with invalid item in batch""" + temp_image = create_temp_image() + + batch_params = [ + {'image_path': temp_image, 'prompt': 'Valid'}, + {'image_path': '/invalid/path.png', 'prompt': 'Invalid'} + ] + + is_valid, error_msg = validate_batch_parameters(batch_params) + assert is_valid is False + assert 'Item 2' in error_msg + + +def test_sanitize_prompt_whitespace(): + """Test prompt sanitization for whitespace""" + prompt = " Make it blue " + sanitized = sanitize_prompt(prompt) + + assert sanitized == "Make it blue" + + +def test_sanitize_prompt_truncation(): + """Test prompt truncation for long prompts""" + prompt = "x" * 2000 # Very long prompt + sanitized = sanitize_prompt(prompt) + + assert len(sanitized) == 1000 + + +def test_validate_api_response_valid(): + """Test validation of valid API response""" + # Mock a valid response + class MockResponse: + def __init__(self): + self.data = [MockData()] + + class MockData: + def __init__(self): + self.b64_json = "base64encodeddata" + + response = MockResponse() + assert validate_api_response(response) is True + + +def test_validate_api_response_invalid(): + """Test validation of invalid API responses""" + # None response + assert validate_api_response(None) is False + + # Response without data attribute + class BadResponse1: + pass + assert validate_api_response(BadResponse1()) is False + + # Response with empty data + class BadResponse2: + def __init__(self): + self.data = [] + assert validate_api_response(BadResponse2()) is False + + # Response without b64_json + class BadResponse3: + def __init__(self): + self.data = [object()] + assert validate_api_response(BadResponse3()) is False + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_webp_optimization.py b/tests/test_webp_optimization.py new file mode 100644 index 0000000..a04e83e --- /dev/null +++ b/tests/test_webp_optimization.py @@ -0,0 +1,205 @@ +""" +Test script for WebP auto-optimization feature +""" + +import sys +import os +from pathlib import Path +import tempfile + +# Add project root to path +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +def test_webp_optimization(): + """Test WebP optimization functionality""" + print("\n" + "="*60) + print("Testing WebP Auto-Optimization Feature") + print("="*60) + + try: + from src.utils.image_utils import ( + optimize_image_to_size_limit, + convert_to_png_with_size_limit, + get_file_size_mb + ) + print("✓ Image utils imported successfully") + + # Create a test image + from PIL import Image + import io + + # Create a large test image (simulate >4MB) + print("\nCreating test image...") + img = Image.new('RGBA', (2048, 2048), color=(255, 0, 0, 255)) + + # Save to temp file + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + img.save(tmp.name, 'PNG') + temp_path = tmp.name + + print(f"✓ Test image created: {temp_path}") + original_size = get_file_size_mb(temp_path) + print(f" Original size: {original_size:.2f}MB") + + # Test WebP conversion + print("\nTesting WebP conversion...") + optimized_data, format_used = convert_to_png_with_size_limit( + temp_path, + max_size_mb=1.0, # Force optimization by setting low limit + prefer_webp=True + ) + + optimized_size = len(optimized_data) / (1024 * 1024) + print(f"✓ Image optimized successfully") + print(f" Format used: {format_used}") + print(f" Optimized size: {optimized_size:.2f}MB") + print(f" Size reduction: {((original_size - optimized_size) / original_size * 100):.1f}%") + + # Clean up + os.unlink(temp_path) + print("\n✓ Test completed successfully!") + + return True + + except ImportError as e: + print(f"✗ Import error: {e}") + print(" Make sure Pillow is installed: pip install pillow") + return False + except Exception as e: + print(f"✗ Test failed: {e}") + import traceback + traceback.print_exc() + return False + + +def test_optimization_with_client(): + """Test optimization with OpenAI client""" + print("\n" + "="*60) + print("Testing OpenAI Client with Auto-Optimization") + print("="*60) + + try: + from src.connector.config import Config + from src.connector.openai_client import ImageEditRequest + + print("✓ Client modules imported") + + # Create test config + config = Config() + print("✓ Config created") + + # Create test request + request = ImageEditRequest( + image_path="test_large_image.png", # This would be a real file in production + prompt="Make the image more colorful", + background="transparent", + auto_optimize=True # Enable auto-optimization + ) + + print("✓ Request created with auto_optimize=True") + print("\nIn production, this would:") + print(" 1. Check if image > 4MB") + print(" 2. Automatically convert to WebP if needed") + print(" 3. Find optimal quality setting") + print(" 4. Send optimized image to OpenAI API") + print(" 5. Log optimization details in response") + + return True + + except Exception as e: + print(f"✗ Test failed: {e}") + return False + + +def test_quality_levels(): + """Test different quality levels for optimization""" + print("\n" + "="*60) + print("Testing Quality Levels") + print("="*60) + + try: + from src.utils.image_utils import optimize_image_to_size_limit + from PIL import Image + import tempfile + + # Create test image with details + print("Creating detailed test image...") + img = Image.new('RGB', (1024, 1024)) + + # Add some patterns to make compression more interesting + from PIL import ImageDraw + draw = ImageDraw.Draw(img) + for i in range(0, 1024, 20): + draw.line([(0, i), (1024, i)], fill=(i % 255, 100, 200)) + draw.line([(i, 0), (i, 1024)], fill=(200, i % 255, 100)) + + # Save to temp file + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + img.save(tmp.name, 'PNG') + temp_path = tmp.name + + print(f"✓ Detailed test image created") + + # Test different size limits + size_limits = [0.5, 1.0, 2.0] + + for limit in size_limits: + print(f"\nTesting with {limit}MB limit...") + try: + optimized = optimize_image_to_size_limit( + temp_path, + max_size_mb=limit, + format='WEBP' + ) + size_mb = len(optimized) / (1024 * 1024) + print(f" ✓ Optimized to {size_mb:.2f}MB (limit: {limit}MB)") + except Exception as e: + print(f" ✗ Failed: {e}") + + # Clean up + os.unlink(temp_path) + + print("\n✓ Quality level tests completed!") + return True + + except Exception as e: + print(f"✗ Test failed: {e}") + return False + + +def main(): + """Run all optimization tests""" + print("="*60) + print("GPTEdit WebP Auto-Optimization Test Suite") + print("="*60) + + tests = [ + test_webp_optimization, + test_optimization_with_client, + test_quality_levels + ] + + passed = 0 + failed = 0 + + for test in tests: + if test(): + passed += 1 + else: + failed += 1 + + print("\n" + "="*60) + print(f"Test Results: {passed} passed, {failed} failed") + print("="*60) + + if failed == 0: + print("✅ All optimization tests passed!") + return 0 + else: + print(f"❌ {failed} test(s) failed") + return 1 + + +if __name__ == "__main__": + sys.exit(main())