import cv2 import json import base64 import requests import win32gui import pyautogui import numpy as np import re from typing import Optional, Tuple, List, Dict class TextFinder: """文字识别类,用于通过 Umi-OCR 查找屏幕上的文字位置""" def __init__(self, umi_url: str = "http://127.0.0.1:1224/api/ocr", config: Optional[dict] = None): """ Args: umi_url (str): Umi-OCR HTTP服务地址 config (dict, optional): 配置字典 """ self.umi_url = umi_url # 从配置中加载置信度阈值,默认为0.7 self.confidence_threshold = config.get('text', {}).get('confidence_threshold', 0.7) if config else 0.7 print(f"TextFinder initialized with Umi-OCR URL: {self.umi_url}") print(f"TextFinder confidence threshold: {self.confidence_threshold}") def recognize_text(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[List[Dict]]: """识别指定区域内的文字 Args: region (Tuple[int, int, int, int], optional): 识别区域 (x, y, width, height) Returns: Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置 """ try: # 截取指定区域的屏幕 screenshot = pyautogui.screenshot(region=region) return self.recognize_text_from_image(screenshot) except Exception as e: print(f"Error during text recognition: {e}") return None def recognize_text_from_image(self, image) -> Optional[List[Dict]]: """从给定的图像中识别文字 Args: image: PIL图像对象 Returns: Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置 """ try: img = np.array(image) # 转换为RGB格式 img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # 编码为base64 _, buf = cv2.imencode('.png', img) b64 = base64.b64encode(buf).decode() # 构建请求体 payload = {"base64": b64, "options": {"data.format": "json"}} # 发送HTTP请求 r = requests.post(self.umi_url, data=json.dumps(payload), headers={"Content-Type": "application/json"}) print(f"Response status code: {r.status_code}") # 解析响应 res = r.json()["data"] # list[ {"box": [[x1,y1],...], "score": 置信度, "text": "文字", "end": "..."}, ... ] # 处理识别结果 text_info_list = [] for item in res: text = item["text"] confidence = item["score"] box = item["box"] # 四角 [[x1,y1],[x2,y2],...] # 转换box格式为 [x1, y1, x2, y2, x3, y3, x4, y4] position = [] for point in box: position.extend(point) text_info = { 'text': text, 'position': position, 'confidence': confidence } text_info_list.append(text_info) # 打印识别结果 print(f"Recognized text: {[info['text'] for info in text_info_list]}") return text_info_list except Exception as e: print(f"Error during text recognition: {e}") return None def screen_ocr_text_pos(self, win_title: str, target_text: str, thresh: float = 0.8) -> Optional[Tuple[int, int, int, int]]: """返回目标文字中心 (x,y,w,h) or None Args: win_title (str): 窗口标题 target_text (str): 目标文字 thresh (float): 置信度阈值 Returns: Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None """ hwnd = win32gui.FindWindow(None, win_title) if hwnd == 0: return None left, top, right, bottom = win32gui.GetWindowRect(hwnd) # 使用recognize_text方法识别文字 text_info_list = self.recognize_text((left, top, right-left, bottom-top)) if not text_info_list: return None # 查找目标文字 for text_info in text_info_list: text = text_info.get('text', '') confidence = text_info.get('confidence', 0) position = text_info.get('position', []) if target_text in text and confidence >= thresh and len(position) >= 8: # 转换位置格式 box = [] for i in range(0, len(position), 2): box.append([position[i], position[i+1]]) box = np.array(box, dtype=int) x, y, w, h = cv2.boundingRect(box) return (x + w//2 + left, y + h//2 + top, w, h) return None def find_text(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None, confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]: """ 查找指定文字的位置 Args: target_text (str): 要查找的文字 region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height) confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值 exact_match (bool, optional): 是否使用精确匹配,默认为False(子字符串匹配) Returns: Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None """ # 使用传入的置信度阈值,如果没有则使用配置中的值 current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold # 识别指定区域内的文字 text_info_list = self.recognize_text(region) return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, region) def find_text_from_image(self, target_text: str, image, confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]: """ 从给定的图像中查找指定文字的位置 Args: target_text (str): 要查找的文字 image: PIL图像对象 confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值 exact_match (bool, optional): 是否使用精确匹配,默认为False(子字符串匹配) Returns: Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None """ # 使用传入的置信度阈值,如果没有则使用配置中的值 current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold # 从图像中识别文字 text_info_list = self.recognize_text_from_image(image) return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, None) def _find_text_from_info_list(self, target_text: str, text_info_list: Optional[List[Dict]], current_threshold: float, exact_match: bool, region: Optional[Tuple[int, int, int, int]]) -> Optional[Tuple[int, int, int, int]]: """从文字信息列表中查找目标文字 Args: target_text (str): 要查找的文字 text_info_list (Optional[List[Dict]]): 文字信息列表 current_threshold (float): 置信度阈值 exact_match (bool): 是否使用精确匹配 region (Optional[Tuple[int, int, int, int]]): 查找区域 Returns: Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None """ if not text_info_list: return None # 查找目标文字 for text_info in text_info_list: text = text_info.get('text', '') confidence = text_info.get('confidence', 0) position = text_info.get('position', []) # 检查文字是否匹配 if (exact_match and text == target_text) or (not exact_match and target_text in text): # 检查置信度是否达标 if confidence >= current_threshold: # 检查位置信息是否足够 if len(position) >= 8: # 计算文字包围盒的左上角坐标和宽高 x_coords = position[0::2] # 提取所有 x 坐标 y_coords = position[1::2] # 提取所有 y 坐标 x = min(x_coords) y = min(y_coords) width = max(x_coords) - x height = max(y_coords) - y # 如果指定了区域,需要调整坐标 if region: x += region[0] y += region[1] # 打印找到的文字位置 print(f"Found text '{target_text}' at position: ({x}, {y}, {width}, {height})") return (x, y, width, height) else: print(f"Found text '{target_text}' but position information is insufficient") else: print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})") return None def find_texts(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None, confidence_threshold: Optional[float] = None, exact_match: bool = False) -> List[Tuple[int, int, int, int]]: """ 查找所有匹配的文字位置 Args: target_text (str): 要查找的文字 region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height) confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值 exact_match (bool, optional): 是否使用精确匹配,默认为False(子字符串匹配) Returns: List[Tuple[int, int, int, int]]: 找到的文字位置列表 [(x, y, width, height), ...],未找到返回空列表 """ # 使用传入的置信度阈值,如果没有则使用配置中的值 current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold # 识别指定区域内的文字 text_info_list = self.recognize_text(region) if not text_info_list: return [] # 查找所有匹配的目标文字 found_texts = [] for text_info in text_info_list: text = text_info.get('text', '') confidence = text_info.get('confidence', 0) position = text_info.get('position', []) # 检查文字是否匹配 if (exact_match and text == target_text) or (not exact_match and target_text in text): # 检查置信度是否达标 if confidence >= current_threshold: # 检查位置信息是否足够 if len(position) >= 8: # 计算文字包围盒的左上角坐标和宽高 x_coords = position[0::2] # 提取所有 x 坐标 y_coords = position[1::2] # 提取所有 y 坐标 x = min(x_coords) y = min(y_coords) width = max(x_coords) - x height = max(y_coords) - y # 如果指定了区域,需要调整坐标 if region: x += region[0] y += region[1] found_texts.append((x, y, width, height)) else: print(f"Found text '{target_text}' but position information is insufficient") else: print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})") # 打印找到的所有文字位置 print(f"Found {len(found_texts)} instances of '{target_text}' at positions: {found_texts}") return found_texts def find_all_texts(self, region: Optional[Tuple[int, int, int, int]] = None, confidence_threshold: Optional[float] = None) -> List[Tuple[int, int, int, int]]: """ 查找指定区域内的所有文字 Args: region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height) confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值 Returns: List[Tuple[int, int, int, int]]: 找到的所有文字位置列表 [(x, y, width, height), ...],未找到返回空列表 """ # 使用传入的置信度阈值,如果没有则使用配置中的值 current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold # 识别指定区域内的文字 text_info_list = self.recognize_text(region) if not text_info_list: return [] # 提取所有文字的位置 all_texts = [] for text_info in text_info_list: confidence = text_info.get('confidence', 0) position = text_info.get('position', []) # 检查置信度是否达标 if confidence >= current_threshold: # 检查位置信息是否足够 if len(position) >= 8: # 计算文字包围盒的左上角坐标和宽高 x_coords = position[0::2] # 提取所有 x 坐标 y_coords = position[1::2] # 提取所有 y 坐标 x = min(x_coords) y = min(y_coords) width = max(x_coords) - x height = max(y_coords) - y # 如果指定了区域,需要调整坐标 if region: x += region[0] y += region[1] all_texts.append((x, y, width, height)) else: print(f"Found text but position information is insufficient") else: print(f"Found text but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})") # 打印找到的所有文字位置 print(f"Found {len(all_texts)} texts in the region") return all_texts def find_closest_text(self, target_position: Tuple[int, int, int, int], text_options: Dict[str, str], region: Optional[Tuple[int, int, int, int]] = None, confidence_threshold: Optional[float] = None, exact_match: bool = False, prefer_side: Optional[str] = None) -> Optional[Tuple[str, Tuple[int, int, int, int]]]: """查找与目标位置最近的文字 Args: target_position (Tuple[int, int, int, int]): 目标文字位置 (x, y, width, height) text_options (Dict[str, str]): 文字选项字典,键为状态名称,值为要查找的文字 region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height) confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值 exact_match (bool, optional): 是否使用精确匹配,默认为False(子字符串匹配) prefer_side (str, optional): 位置偏好,可选值为 'right'(右侧)、'left'(左侧)、'top'(上方)、'bottom'(下方) Returns: Optional[Tuple[str, Tuple[int, int, int, int]]]: 距离最近的文字状态和位置,未找到返回 None """ # 使用传入的置信度阈值,如果没有则使用配置中的值 current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold # 计算目标文字的中心坐标 target_x, target_y, target_w, target_h = target_position target_center_x = target_x + target_w // 2 target_center_y = target_y + target_h // 2 # 查找所有文字选项 closest_distance = float('inf') closest_status = None closest_rect = None for status, text in text_options.items(): # 查找所有匹配的文字 text_rects = self.find_texts(text, region=region, confidence_threshold=current_threshold, exact_match=exact_match) # 计算每个文字与目标的距离 for rect in text_rects: rect_x, rect_y, rect_w, rect_h = rect rect_center_x = rect_x + rect_w // 2 rect_center_y = rect_y + rect_h // 2 # 计算欧几里得距离 distance = ((rect_center_x - target_center_x) ** 2 + (rect_center_y - target_center_y) ** 2) ** 0.5 # 如果有位置偏好,调整距离计算 if prefer_side: # 根据位置偏好添加距离惩罚 if prefer_side == 'right' and rect_center_x < target_center_x: # 如果偏好右侧但文字在左侧,增加距离惩罚 distance += 1000 elif prefer_side == 'left' and rect_center_x > target_center_x: # 如果偏好左侧但文字在右侧,增加距离惩罚 distance += 1000 elif prefer_side == 'top' and rect_center_y > target_center_y: # 如果偏好上方但文字在下方,增加距离惩罚 distance += 1000 elif prefer_side == 'bottom' and rect_center_y < target_center_y: # 如果偏好下方但文字在上方,增加距离惩罚 distance += 1000 # 更新最近的文字 if distance < closest_distance: closest_distance = distance closest_status = status closest_rect = rect if closest_status and closest_rect: # 打印找到的最近文字 print(f"Closest text found: {closest_status} at position: {closest_rect}") return closest_status, closest_rect return None def get_search_region(self, range_config: List[float] or dict, base_region: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]: """ 根据配置获取搜索区域 Args: range_config: 搜索范围配置,支持以下几种格式: 1. [x_percent, y_percent] - 百分比范围,正数从左上角开始,负数从右下角开始 2. {"center": [x_percent, y_percent, width_percent, height_percent]} - 以中心为0点的百分比 3. {"rect": [x, y, width, height]} - 直接定义像素坐标的查找框 base_region: 基础区域 (x, y, w, h) Returns: 调整后的搜索区域 (x, y, w, h) """ x, y, w, h = base_region # 处理不同类型的配置 if isinstance(range_config, list) and len(range_config) == 2: # 原有方式:[x_percent, y_percent] x_percent, y_percent = range_config # 确保百分比在有效范围内 x_percent = min(max(x_percent, -100), 100) y_percent = min(max(y_percent, -100), 100) # 处理x方向:正数从左侧开始,负数从右侧开始 if x_percent >= 0: search_w = int(w * x_percent / 100) search_x = x else: search_w = int(w * abs(x_percent) / 100) search_x = x + w - search_w # 处理y方向:正数从顶部开始,负数从底部开始 if y_percent >= 0: search_h = int(h * y_percent / 100) search_y = y else: search_h = int(h * abs(y_percent) / 100) search_y = y + h - search_h elif isinstance(range_config, dict) and "center" in range_config: # 中心坐标方式:{"center": [x_percent, y_percent, width_percent, height_percent]} center_config = range_config["center"] if len(center_config) == 4: center_x_percent, center_y_percent, width_percent, height_percent = center_config # 确保百分比在有效范围内 width_percent = min(max(width_percent, 0), 100) height_percent = min(max(height_percent, 0), 100) # 计算实际宽度和高度 search_w = int(w * width_percent / 100) search_h = int(h * height_percent / 100) # 计算中心点坐标 center_x = x + w // 2 center_y = y + h // 2 # 计算偏移量 offset_x = int(w * center_x_percent / 100) offset_y = int(h * center_y_percent / 100) # 计算搜索区域的左上角坐标 search_x = center_x + offset_x - search_w // 2 search_y = center_y + offset_y - search_h // 2 # 确保搜索区域在基础区域内 search_x = max(search_x, x) search_y = max(search_y, y) search_w = min(search_w, x + w - search_x) search_h = min(search_h, y + h - search_y) elif isinstance(range_config, dict) and "rect" in range_config: # 直接定义方式:{"rect": [x, y, width, height]} rect_config = range_config["rect"] if len(rect_config) == 4: rect_x, rect_y, rect_w, rect_h = rect_config # 直接使用配置的坐标和大小 search_x = x + rect_x search_y = y + rect_y search_w = rect_w search_h = rect_h # 确保搜索区域在基础区域内 search_x = max(search_x, x) search_y = max(search_y, y) search_w = min(search_w, x + w - search_x) search_h = min(search_h, y + h - search_y) else: # 默认使用全屏 search_x, search_y, search_w, search_h = base_region return (search_x, search_y, search_w, search_h) def find_character_coordinates(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[Tuple[int, int]]: """ 查找角色的坐标 Args: region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height) Returns: Optional[Tuple[int, int]]: 找到的角色坐标 (x, y),未找到返回 None """ import time start_time = time.time() try: # 打印查找区域信息 if region: print(f"开始查找角色坐标,查找区域: {region}") else: print("开始查找角色坐标,使用默认全屏区域") # 识别指定区域内的文字 text_info_list = self.recognize_text(region) # 打印识别到的文字信息 print(f"识别到 {len(text_info_list)} 个文本区域") for i, text_info in enumerate(text_info_list): text = text_info.get('text', '') confidence = text_info.get('confidence', 0) bbox = text_info.get('box', (0, 0, 0, 0)) print(f" [{i+1}] 文本: '{text}',置信度: {confidence:.2f},位置: {bbox}") if not text_info_list: print("未识别到任何文本,无法查找角色坐标") return None # 定义匹配时间和坐标格式的正则表达式 # 匹配类似 "寅时(123,123)"、"寅时(123,123)"、"寅时(123,123)"、"寅时(123.123)"、"寅时(123123)"、"寅时(123123)" 等格式 # 匹配任何汉字加"时"的格式,提高识别灵活性 coord_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\((]+(\d+)(?:[\s,,.]+)?(\d+)[\s\))]+', re.UNICODE) # 匹配单独的时间部分,如 "寅时" time_pattern = re.compile(r'[\u4e00-\u9fa5]+时', re.UNICODE) # 匹配坐标部分,包括带时间前缀的,如 "(123,123)"、"(123,123)"、"寅时(123,123)"、"寅时(123.123)"、"寅时(123123)"、"寅时(123123)" coord_only_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\((]+(\d+)(?:[\s,,.]+)?(\d+)[\s\))]+|[\((]+(\d+)(?:[\s,,.]+)?(\d+)[\s\))]+', re.UNICODE) # 查找匹配的文字 print("\n尝试匹配完整坐标格式...") for text_info in text_info_list: text = text_info.get('text', '') # 去除空格,提高匹配成功率 text = text.replace(' ', '') confidence = text_info.get('confidence', 0) # 检查置信度 # if confidence < 0.7: # continue # 尝试匹配完整坐标格式 match = coord_pattern.search(text) if match: # 提取坐标 x_coord = int(match.group(1)) y_coord = int(match.group(2)) end_time = time.time() print(f"✓ 找到角色坐标: ({x_coord}, {y_coord})") print(f" 匹配文本: '{text}',置信度: {confidence:.2f}") print(f" 查找耗时: {end_time - start_time:.2f} 秒") return (x_coord, y_coord) # 如果没有找到完整匹配,尝试查找分离的时间和坐标 print("\n未找到完整坐标格式,尝试查找分离的时间和坐标...") # 首先找到时间部分 time_texts = [] for text_info in text_info_list: text = text_info.get('text', '') # 去除空格,提高匹配成功率 text = text.replace(' ', '') confidence = text_info.get('confidence', 0) # if confidence < 0.7: # continue if time_pattern.search(text): time_texts.append(text_info) print(f"找到 {len(time_texts)} 个时间文本") for i, time_info in enumerate(time_texts): text = time_info.get('text', '') confidence = time_info.get('confidence', 0) bbox = time_info.get('bbox', (0, 0, 0, 0)) print(f" 时间 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}") # 然后找到坐标部分 coord_texts = [] for text_info in text_info_list: text = text_info.get('text', '') # 去除空格,提高匹配成功率 text = text.replace(' ', '') confidence = text_info.get('confidence', 0) # if confidence < 0.7: # continue if coord_only_pattern.search(text): coord_texts.append(text_info) print(f"找到 {len(coord_texts)} 个坐标文本") for i, coord_info in enumerate(coord_texts): text = coord_info.get('text', '') confidence = text_info.get('confidence', 0) bbox = coord_info.get('bbox', (0, 0, 0, 0)) print(f" 坐标 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}") # 检查时间和坐标是否在附近 print("\n检查时间和坐标是否在附近...") for time_info in time_texts: time_bbox = time_info.get('bbox', (0, 0, 0, 0)) time_x, time_y, time_w, time_h = time_bbox time_text = time_info.get('text', '') for coord_info in coord_texts: coord_bbox = coord_info.get('bbox', (0, 0, 0, 0)) coord_x, coord_y, coord_w, coord_h = coord_bbox coord_text = coord_info.get('text', '') # 计算距离 distance = ((coord_x - (time_x + time_w)) ** 2 + (coord_y - time_y) ** 2) ** 0.5 print(f" 时间 '{time_text}' 与坐标 '{coord_text}' 的距离: {distance:.1f} 像素") # 检查坐标是否在时间的右侧或下方附近(距离不超过100像素) if distance < 100: # 提取坐标 text = coord_info.get('text', '') match = coord_only_pattern.search(text) if match: # 处理正则表达式的捕获组,考虑带时间前缀和不带时间前缀的情况 if match.group(1) and match.group(2): x_coord = int(match.group(1)) y_coord = int(match.group(2)) elif match.group(3) and match.group(4): x_coord = int(match.group(3)) y_coord = int(match.group(4)) else: continue end_time = time.time() print(f"✓ 找到角色坐标 (分离文本): ({x_coord}, {y_coord})") print(f" 时间文本: '{time_text}'") print(f" 坐标文本: '{coord_text}'") print(f" 距离: {distance:.1f} 像素") print(f" 查找耗时: {end_time - start_time:.2f} 秒") return (x_coord, y_coord) end_time = time.time() print(f"\n✗ 未找到角色坐标") print(f" 查找耗时: {end_time - start_time:.2f} 秒") return None except Exception as e: end_time = time.time() print(f"\n✗ 查找角色坐标时出错: {e}") print(f" 查找耗时: {end_time - start_time:.2f} 秒") import traceback print(f" 错误详情: {traceback.format_exc()}") return None