Files
wow/text_finder.py
2026-03-18 09:04:37 +08:00

678 lines
32 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import json
import base64
import requests
import win32gui
import pyautogui
import numpy as np
import re
from typing import Optional, Tuple, List, Dict
class TextFinder:
"""文字识别类,用于通过 Umi-OCR 查找屏幕上的文字位置"""
def __init__(self, umi_url: str = "http://127.0.0.1:1224/api/ocr", config: Optional[dict] = None):
"""
Args:
umi_url (str): Umi-OCR HTTP服务地址
config (dict, optional): 配置字典
"""
self.umi_url = umi_url
# 从配置中加载置信度阈值默认为0.7
self.confidence_threshold = config.get('text', {}).get('confidence_threshold', 0.7) if config else 0.7
print(f"TextFinder initialized with Umi-OCR URL: {self.umi_url}")
print(f"TextFinder confidence threshold: {self.confidence_threshold}")
def recognize_text(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[List[Dict]]:
"""识别指定区域内的文字
Args:
region (Tuple[int, int, int, int], optional): 识别区域 (x, y, width, height)
Returns:
Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置
"""
try:
# 截取指定区域的屏幕
screenshot = pyautogui.screenshot(region=region)
return self.recognize_text_from_image(screenshot)
except Exception as e:
print(f"Error during text recognition: {e}")
return None
def recognize_text_from_image(self, image) -> Optional[List[Dict]]:
"""从给定的图像中识别文字
Args:
image: PIL图像对象
Returns:
Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置
"""
try:
img = np.array(image)
# 转换为RGB格式
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# 编码为base64
_, buf = cv2.imencode('.png', img)
b64 = base64.b64encode(buf).decode()
# 构建请求体
payload = {"base64": b64, "options": {"data.format": "json"}}
# 发送HTTP请求
r = requests.post(self.umi_url, data=json.dumps(payload),
headers={"Content-Type": "application/json"})
print(f"Response status code: {r.status_code}")
# 解析响应
res = r.json()["data"] # list[ {"box": [[x1,y1],...], "score": 置信度, "text": "文字", "end": "..."}, ... ]
# 处理识别结果
text_info_list = []
for item in res:
text = item["text"]
confidence = item["score"]
box = item["box"] # 四角 [[x1,y1],[x2,y2],...]
# 转换box格式为 [x1, y1, x2, y2, x3, y3, x4, y4]
position = []
for point in box:
position.extend(point)
text_info = {
'text': text,
'position': position,
'confidence': confidence
}
text_info_list.append(text_info)
# 打印识别结果
print(f"Recognized text: {[info['text'] for info in text_info_list]}")
return text_info_list
except Exception as e:
print(f"Error during text recognition: {e}")
return None
def screen_ocr_text_pos(self, win_title: str, target_text: str, thresh: float = 0.8) -> Optional[Tuple[int, int, int, int]]:
"""返回目标文字中心 (x,y,w,h) or None
Args:
win_title (str): 窗口标题
target_text (str): 目标文字
thresh (float): 置信度阈值
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
hwnd = win32gui.FindWindow(None, win_title)
if hwnd == 0:
return None
left, top, right, bottom = win32gui.GetWindowRect(hwnd)
# 使用recognize_text方法识别文字
text_info_list = self.recognize_text((left, top, right-left, bottom-top))
if not text_info_list:
return None
# 查找目标文字
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
if target_text in text and confidence >= thresh and len(position) >= 8:
# 转换位置格式
box = []
for i in range(0, len(position), 2):
box.append([position[i], position[i+1]])
box = np.array(box, dtype=int)
x, y, w, h = cv2.boundingRect(box)
return (x + w//2 + left, y + h//2 + top, w, h)
return None
def find_text(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]:
"""
查找指定文字的位置
Args:
target_text (str): 要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, region)
def find_text_from_image(self, target_text: str, image,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]:
"""
从给定的图像中查找指定文字的位置
Args:
target_text (str): 要查找的文字
image: PIL图像对象
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 从图像中识别文字
text_info_list = self.recognize_text_from_image(image)
return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, None)
def _find_text_from_info_list(self, target_text: str, text_info_list: Optional[List[Dict]],
current_threshold: float, exact_match: bool,
region: Optional[Tuple[int, int, int, int]]) -> Optional[Tuple[int, int, int, int]]:
"""从文字信息列表中查找目标文字
Args:
target_text (str): 要查找的文字
text_info_list (Optional[List[Dict]]): 文字信息列表
current_threshold (float): 置信度阈值
exact_match (bool): 是否使用精确匹配
region (Optional[Tuple[int, int, int, int]]): 查找区域
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
if not text_info_list:
return None
# 查找目标文字
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查文字是否匹配
if (exact_match and text == target_text) or (not exact_match and target_text in text):
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
# 打印找到的文字位置
print(f"Found text '{target_text}' at position: ({x}, {y}, {width}, {height})")
return (x, y, width, height)
else:
print(f"Found text '{target_text}' but position information is insufficient")
else:
print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
return None
def find_texts(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> List[Tuple[int, int, int, int]]:
"""
查找所有匹配的文字位置
Args:
target_text (str): 要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
List[Tuple[int, int, int, int]]: 找到的文字位置列表 [(x, y, width, height), ...],未找到返回空列表
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
if not text_info_list:
return []
# 查找所有匹配的目标文字
found_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查文字是否匹配
if (exact_match and text == target_text) or (not exact_match and target_text in text):
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
found_texts.append((x, y, width, height))
else:
print(f"Found text '{target_text}' but position information is insufficient")
else:
print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
# 打印找到的所有文字位置
print(f"Found {len(found_texts)} instances of '{target_text}' at positions: {found_texts}")
return found_texts
def find_all_texts(self, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None) -> List[Tuple[int, int, int, int]]:
"""
查找指定区域内的所有文字
Args:
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
Returns:
List[Tuple[int, int, int, int]]: 找到的所有文字位置列表 [(x, y, width, height), ...],未找到返回空列表
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
if not text_info_list:
return []
# 提取所有文字的位置
all_texts = []
for text_info in text_info_list:
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
all_texts.append((x, y, width, height))
else:
print(f"Found text but position information is insufficient")
else:
print(f"Found text but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
# 打印找到的所有文字位置
print(f"Found {len(all_texts)} texts in the region")
return all_texts
def find_closest_text(self, target_position: Tuple[int, int, int, int],
text_options: Dict[str, str],
region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None,
exact_match: bool = False,
prefer_side: Optional[str] = None) -> Optional[Tuple[str, Tuple[int, int, int, int]]]:
"""查找与目标位置最近的文字
Args:
target_position (Tuple[int, int, int, int]): 目标文字位置 (x, y, width, height)
text_options (Dict[str, str]): 文字选项字典,键为状态名称,值为要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
prefer_side (str, optional): 位置偏好,可选值为 'right'(右侧)、'left'(左侧)、'top'(上方)、'bottom'(下方)
Returns:
Optional[Tuple[str, Tuple[int, int, int, int]]]: 距离最近的文字状态和位置,未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 计算目标文字的中心坐标
target_x, target_y, target_w, target_h = target_position
target_center_x = target_x + target_w // 2
target_center_y = target_y + target_h // 2
# 查找所有文字选项
closest_distance = float('inf')
closest_status = None
closest_rect = None
for status, text in text_options.items():
# 查找所有匹配的文字
text_rects = self.find_texts(text, region=region, confidence_threshold=current_threshold, exact_match=exact_match)
# 计算每个文字与目标的距离
for rect in text_rects:
rect_x, rect_y, rect_w, rect_h = rect
rect_center_x = rect_x + rect_w // 2
rect_center_y = rect_y + rect_h // 2
# 计算欧几里得距离
distance = ((rect_center_x - target_center_x) ** 2 + (rect_center_y - target_center_y) ** 2) ** 0.5
# 如果有位置偏好,调整距离计算
if prefer_side:
# 根据位置偏好添加距离惩罚
if prefer_side == 'right' and rect_center_x < target_center_x:
# 如果偏好右侧但文字在左侧,增加距离惩罚
distance += 1000
elif prefer_side == 'left' and rect_center_x > target_center_x:
# 如果偏好左侧但文字在右侧,增加距离惩罚
distance += 1000
elif prefer_side == 'top' and rect_center_y > target_center_y:
# 如果偏好上方但文字在下方,增加距离惩罚
distance += 1000
elif prefer_side == 'bottom' and rect_center_y < target_center_y:
# 如果偏好下方但文字在上方,增加距离惩罚
distance += 1000
# 更新最近的文字
if distance < closest_distance:
closest_distance = distance
closest_status = status
closest_rect = rect
if closest_status and closest_rect:
# 打印找到的最近文字
print(f"Closest text found: {closest_status} at position: {closest_rect}")
return closest_status, closest_rect
return None
def get_search_region(self, range_config: List[float] or dict, base_region: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
"""
根据配置获取搜索区域
Args:
range_config: 搜索范围配置,支持以下几种格式:
1. [x_percent, y_percent] - 百分比范围,正数从左上角开始,负数从右下角开始
2. {"center": [x_percent, y_percent, width_percent, height_percent]} - 以中心为0点的百分比
3. {"rect": [x, y, width, height]} - 直接定义像素坐标的查找框
base_region: 基础区域 (x, y, w, h)
Returns:
调整后的搜索区域 (x, y, w, h)
"""
x, y, w, h = base_region
# 处理不同类型的配置
if isinstance(range_config, list) and len(range_config) == 2:
# 原有方式:[x_percent, y_percent]
x_percent, y_percent = range_config
# 确保百分比在有效范围内
x_percent = min(max(x_percent, -100), 100)
y_percent = min(max(y_percent, -100), 100)
# 处理x方向正数从左侧开始负数从右侧开始
if x_percent >= 0:
search_w = int(w * x_percent / 100)
search_x = x
else:
search_w = int(w * abs(x_percent) / 100)
search_x = x + w - search_w
# 处理y方向正数从顶部开始负数从底部开始
if y_percent >= 0:
search_h = int(h * y_percent / 100)
search_y = y
else:
search_h = int(h * abs(y_percent) / 100)
search_y = y + h - search_h
elif isinstance(range_config, dict) and "center" in range_config:
# 中心坐标方式:{"center": [x_percent, y_percent, width_percent, height_percent]}
center_config = range_config["center"]
if len(center_config) == 4:
center_x_percent, center_y_percent, width_percent, height_percent = center_config
# 确保百分比在有效范围内
width_percent = min(max(width_percent, 0), 100)
height_percent = min(max(height_percent, 0), 100)
# 计算实际宽度和高度
search_w = int(w * width_percent / 100)
search_h = int(h * height_percent / 100)
# 计算中心点坐标
center_x = x + w // 2
center_y = y + h // 2
# 计算偏移量
offset_x = int(w * center_x_percent / 100)
offset_y = int(h * center_y_percent / 100)
# 计算搜索区域的左上角坐标
search_x = center_x + offset_x - search_w // 2
search_y = center_y + offset_y - search_h // 2
# 确保搜索区域在基础区域内
search_x = max(search_x, x)
search_y = max(search_y, y)
search_w = min(search_w, x + w - search_x)
search_h = min(search_h, y + h - search_y)
elif isinstance(range_config, dict) and "rect" in range_config:
# 直接定义方式:{"rect": [x, y, width, height]}
rect_config = range_config["rect"]
if len(rect_config) == 4:
rect_x, rect_y, rect_w, rect_h = rect_config
# 直接使用配置的坐标和大小
search_x = x + rect_x
search_y = y + rect_y
search_w = rect_w
search_h = rect_h
# 确保搜索区域在基础区域内
search_x = max(search_x, x)
search_y = max(search_y, y)
search_w = min(search_w, x + w - search_x)
search_h = min(search_h, y + h - search_y)
else:
# 默认使用全屏
search_x, search_y, search_w, search_h = base_region
return (search_x, search_y, search_w, search_h)
def find_character_coordinates(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[Tuple[int, int]]:
"""
查找角色的坐标
Args:
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
Returns:
Optional[Tuple[int, int]]: 找到的角色坐标 (x, y),未找到返回 None
"""
import time
start_time = time.time()
try:
# 打印查找区域信息
if region:
print(f"开始查找角色坐标,查找区域: {region}")
else:
print("开始查找角色坐标,使用默认全屏区域")
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
# 打印识别到的文字信息
print(f"识别到 {len(text_info_list)} 个文本区域")
for i, text_info in enumerate(text_info_list):
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
bbox = text_info.get('box', (0, 0, 0, 0))
print(f" [{i+1}] 文本: '{text}',置信度: {confidence:.2f},位置: {bbox}")
if not text_info_list:
print("未识别到任何文本,无法查找角色坐标")
return None
# 定义匹配时间和坐标格式的正则表达式
# 匹配类似 "寅时123,123"、"寅时(123,123)"、"寅时123123"、"寅时123.123"、"寅时(123123)"、"寅时123123" 等格式
# 匹配任何汉字加"时"的格式,提高识别灵活性
coord_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+', re.UNICODE)
# 匹配单独的时间部分,如 "寅时"
time_pattern = re.compile(r'[\u4e00-\u9fa5]+时', re.UNICODE)
# 匹配坐标部分,包括带时间前缀的,如 "(123,123)"、"123123"、"寅时123,123"、"寅时123.123"、"寅时(123123)"、"寅时123123"
coord_only_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+|[\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+', re.UNICODE)
# 查找匹配的文字
print("\n尝试匹配完整坐标格式...")
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# 检查置信度
# if confidence < 0.7:
# continue
# 尝试匹配完整坐标格式
match = coord_pattern.search(text)
if match:
# 提取坐标
x_coord = int(match.group(1))
y_coord = int(match.group(2))
end_time = time.time()
print(f"✓ 找到角色坐标: ({x_coord}, {y_coord})")
print(f" 匹配文本: '{text}',置信度: {confidence:.2f}")
print(f" 查找耗时: {end_time - start_time:.2f}")
return (x_coord, y_coord)
# 如果没有找到完整匹配,尝试查找分离的时间和坐标
print("\n未找到完整坐标格式,尝试查找分离的时间和坐标...")
# 首先找到时间部分
time_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# if confidence < 0.7:
# continue
if time_pattern.search(text):
time_texts.append(text_info)
print(f"找到 {len(time_texts)} 个时间文本")
for i, time_info in enumerate(time_texts):
text = time_info.get('text', '')
confidence = time_info.get('confidence', 0)
bbox = time_info.get('bbox', (0, 0, 0, 0))
print(f" 时间 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}")
# 然后找到坐标部分
coord_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# if confidence < 0.7:
# continue
if coord_only_pattern.search(text):
coord_texts.append(text_info)
print(f"找到 {len(coord_texts)} 个坐标文本")
for i, coord_info in enumerate(coord_texts):
text = coord_info.get('text', '')
confidence = text_info.get('confidence', 0)
bbox = coord_info.get('bbox', (0, 0, 0, 0))
print(f" 坐标 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}")
# 检查时间和坐标是否在附近
print("\n检查时间和坐标是否在附近...")
for time_info in time_texts:
time_bbox = time_info.get('bbox', (0, 0, 0, 0))
time_x, time_y, time_w, time_h = time_bbox
time_text = time_info.get('text', '')
for coord_info in coord_texts:
coord_bbox = coord_info.get('bbox', (0, 0, 0, 0))
coord_x, coord_y, coord_w, coord_h = coord_bbox
coord_text = coord_info.get('text', '')
# 计算距离
distance = ((coord_x - (time_x + time_w)) ** 2 + (coord_y - time_y) ** 2) ** 0.5
print(f" 时间 '{time_text}' 与坐标 '{coord_text}' 的距离: {distance:.1f} 像素")
# 检查坐标是否在时间的右侧或下方附近距离不超过100像素
if distance < 100:
# 提取坐标
text = coord_info.get('text', '')
match = coord_only_pattern.search(text)
if match:
# 处理正则表达式的捕获组,考虑带时间前缀和不带时间前缀的情况
if match.group(1) and match.group(2):
x_coord = int(match.group(1))
y_coord = int(match.group(2))
elif match.group(3) and match.group(4):
x_coord = int(match.group(3))
y_coord = int(match.group(4))
else:
continue
end_time = time.time()
print(f"✓ 找到角色坐标 (分离文本): ({x_coord}, {y_coord})")
print(f" 时间文本: '{time_text}'")
print(f" 坐标文本: '{coord_text}'")
print(f" 距离: {distance:.1f} 像素")
print(f" 查找耗时: {end_time - start_time:.2f}")
return (x_coord, y_coord)
end_time = time.time()
print(f"\n✗ 未找到角色坐标")
print(f" 查找耗时: {end_time - start_time:.2f}")
return None
except Exception as e:
end_time = time.time()
print(f"\n✗ 查找角色坐标时出错: {e}")
print(f" 查找耗时: {end_time - start_time:.2f}")
import traceback
print(f" 错误详情: {traceback.format_exc()}")
return None