first commit

This commit is contained in:
王鹏
2026-03-18 09:04:37 +08:00
commit b7719b377d
121 changed files with 116104 additions and 0 deletions

678
text_finder.py Normal file
View File

@@ -0,0 +1,678 @@
import cv2
import json
import base64
import requests
import win32gui
import pyautogui
import numpy as np
import re
from typing import Optional, Tuple, List, Dict
class TextFinder:
"""文字识别类,用于通过 Umi-OCR 查找屏幕上的文字位置"""
def __init__(self, umi_url: str = "http://127.0.0.1:1224/api/ocr", config: Optional[dict] = None):
"""
Args:
umi_url (str): Umi-OCR HTTP服务地址
config (dict, optional): 配置字典
"""
self.umi_url = umi_url
# 从配置中加载置信度阈值默认为0.7
self.confidence_threshold = config.get('text', {}).get('confidence_threshold', 0.7) if config else 0.7
print(f"TextFinder initialized with Umi-OCR URL: {self.umi_url}")
print(f"TextFinder confidence threshold: {self.confidence_threshold}")
def recognize_text(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[List[Dict]]:
"""识别指定区域内的文字
Args:
region (Tuple[int, int, int, int], optional): 识别区域 (x, y, width, height)
Returns:
Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置
"""
try:
# 截取指定区域的屏幕
screenshot = pyautogui.screenshot(region=region)
return self.recognize_text_from_image(screenshot)
except Exception as e:
print(f"Error during text recognition: {e}")
return None
def recognize_text_from_image(self, image) -> Optional[List[Dict]]:
"""从给定的图像中识别文字
Args:
image: PIL图像对象
Returns:
Optional[List[Dict]]: 识别到的文字信息列表,每个元素包含文字内容和位置
"""
try:
img = np.array(image)
# 转换为RGB格式
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# 编码为base64
_, buf = cv2.imencode('.png', img)
b64 = base64.b64encode(buf).decode()
# 构建请求体
payload = {"base64": b64, "options": {"data.format": "json"}}
# 发送HTTP请求
r = requests.post(self.umi_url, data=json.dumps(payload),
headers={"Content-Type": "application/json"})
print(f"Response status code: {r.status_code}")
# 解析响应
res = r.json()["data"] # list[ {"box": [[x1,y1],...], "score": 置信度, "text": "文字", "end": "..."}, ... ]
# 处理识别结果
text_info_list = []
for item in res:
text = item["text"]
confidence = item["score"]
box = item["box"] # 四角 [[x1,y1],[x2,y2],...]
# 转换box格式为 [x1, y1, x2, y2, x3, y3, x4, y4]
position = []
for point in box:
position.extend(point)
text_info = {
'text': text,
'position': position,
'confidence': confidence
}
text_info_list.append(text_info)
# 打印识别结果
print(f"Recognized text: {[info['text'] for info in text_info_list]}")
return text_info_list
except Exception as e:
print(f"Error during text recognition: {e}")
return None
def screen_ocr_text_pos(self, win_title: str, target_text: str, thresh: float = 0.8) -> Optional[Tuple[int, int, int, int]]:
"""返回目标文字中心 (x,y,w,h) or None
Args:
win_title (str): 窗口标题
target_text (str): 目标文字
thresh (float): 置信度阈值
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
hwnd = win32gui.FindWindow(None, win_title)
if hwnd == 0:
return None
left, top, right, bottom = win32gui.GetWindowRect(hwnd)
# 使用recognize_text方法识别文字
text_info_list = self.recognize_text((left, top, right-left, bottom-top))
if not text_info_list:
return None
# 查找目标文字
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
if target_text in text and confidence >= thresh and len(position) >= 8:
# 转换位置格式
box = []
for i in range(0, len(position), 2):
box.append([position[i], position[i+1]])
box = np.array(box, dtype=int)
x, y, w, h = cv2.boundingRect(box)
return (x + w//2 + left, y + h//2 + top, w, h)
return None
def find_text(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]:
"""
查找指定文字的位置
Args:
target_text (str): 要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, region)
def find_text_from_image(self, target_text: str, image,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> Optional[Tuple[int, int, int, int]]:
"""
从给定的图像中查找指定文字的位置
Args:
target_text (str): 要查找的文字
image: PIL图像对象
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 从图像中识别文字
text_info_list = self.recognize_text_from_image(image)
return self._find_text_from_info_list(target_text, text_info_list, current_threshold, exact_match, None)
def _find_text_from_info_list(self, target_text: str, text_info_list: Optional[List[Dict]],
current_threshold: float, exact_match: bool,
region: Optional[Tuple[int, int, int, int]]) -> Optional[Tuple[int, int, int, int]]:
"""从文字信息列表中查找目标文字
Args:
target_text (str): 要查找的文字
text_info_list (Optional[List[Dict]]): 文字信息列表
current_threshold (float): 置信度阈值
exact_match (bool): 是否使用精确匹配
region (Optional[Tuple[int, int, int, int]]): 查找区域
Returns:
Optional[Tuple[int, int, int, int]]: 找到的文字位置 (x, y, width, height),未找到返回 None
"""
if not text_info_list:
return None
# 查找目标文字
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查文字是否匹配
if (exact_match and text == target_text) or (not exact_match and target_text in text):
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
# 打印找到的文字位置
print(f"Found text '{target_text}' at position: ({x}, {y}, {width}, {height})")
return (x, y, width, height)
else:
print(f"Found text '{target_text}' but position information is insufficient")
else:
print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
return None
def find_texts(self, target_text: str, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None, exact_match: bool = False) -> List[Tuple[int, int, int, int]]:
"""
查找所有匹配的文字位置
Args:
target_text (str): 要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
Returns:
List[Tuple[int, int, int, int]]: 找到的文字位置列表 [(x, y, width, height), ...],未找到返回空列表
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
if not text_info_list:
return []
# 查找所有匹配的目标文字
found_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查文字是否匹配
if (exact_match and text == target_text) or (not exact_match and target_text in text):
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
found_texts.append((x, y, width, height))
else:
print(f"Found text '{target_text}' but position information is insufficient")
else:
print(f"Found text '{target_text}' but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
# 打印找到的所有文字位置
print(f"Found {len(found_texts)} instances of '{target_text}' at positions: {found_texts}")
return found_texts
def find_all_texts(self, region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None) -> List[Tuple[int, int, int, int]]:
"""
查找指定区域内的所有文字
Args:
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
Returns:
List[Tuple[int, int, int, int]]: 找到的所有文字位置列表 [(x, y, width, height), ...],未找到返回空列表
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
if not text_info_list:
return []
# 提取所有文字的位置
all_texts = []
for text_info in text_info_list:
confidence = text_info.get('confidence', 0)
position = text_info.get('position', [])
# 检查置信度是否达标
if confidence >= current_threshold:
# 检查位置信息是否足够
if len(position) >= 8:
# 计算文字包围盒的左上角坐标和宽高
x_coords = position[0::2] # 提取所有 x 坐标
y_coords = position[1::2] # 提取所有 y 坐标
x = min(x_coords)
y = min(y_coords)
width = max(x_coords) - x
height = max(y_coords) - y
# 如果指定了区域,需要调整坐标
if region:
x += region[0]
y += region[1]
all_texts.append((x, y, width, height))
else:
print(f"Found text but position information is insufficient")
else:
print(f"Found text but confidence is too low: {confidence:.2f} (required: {current_threshold:.2f})")
# 打印找到的所有文字位置
print(f"Found {len(all_texts)} texts in the region")
return all_texts
def find_closest_text(self, target_position: Tuple[int, int, int, int],
text_options: Dict[str, str],
region: Optional[Tuple[int, int, int, int]] = None,
confidence_threshold: Optional[float] = None,
exact_match: bool = False,
prefer_side: Optional[str] = None) -> Optional[Tuple[str, Tuple[int, int, int, int]]]:
"""查找与目标位置最近的文字
Args:
target_position (Tuple[int, int, int, int]): 目标文字位置 (x, y, width, height)
text_options (Dict[str, str]): 文字选项字典,键为状态名称,值为要查找的文字
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
confidence_threshold (float, optional): 置信度阈值,不指定则使用配置中的值
exact_match (bool, optional): 是否使用精确匹配默认为False子字符串匹配
prefer_side (str, optional): 位置偏好,可选值为 'right'(右侧)、'left'(左侧)、'top'(上方)、'bottom'(下方)
Returns:
Optional[Tuple[str, Tuple[int, int, int, int]]]: 距离最近的文字状态和位置,未找到返回 None
"""
# 使用传入的置信度阈值,如果没有则使用配置中的值
current_threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
# 计算目标文字的中心坐标
target_x, target_y, target_w, target_h = target_position
target_center_x = target_x + target_w // 2
target_center_y = target_y + target_h // 2
# 查找所有文字选项
closest_distance = float('inf')
closest_status = None
closest_rect = None
for status, text in text_options.items():
# 查找所有匹配的文字
text_rects = self.find_texts(text, region=region, confidence_threshold=current_threshold, exact_match=exact_match)
# 计算每个文字与目标的距离
for rect in text_rects:
rect_x, rect_y, rect_w, rect_h = rect
rect_center_x = rect_x + rect_w // 2
rect_center_y = rect_y + rect_h // 2
# 计算欧几里得距离
distance = ((rect_center_x - target_center_x) ** 2 + (rect_center_y - target_center_y) ** 2) ** 0.5
# 如果有位置偏好,调整距离计算
if prefer_side:
# 根据位置偏好添加距离惩罚
if prefer_side == 'right' and rect_center_x < target_center_x:
# 如果偏好右侧但文字在左侧,增加距离惩罚
distance += 1000
elif prefer_side == 'left' and rect_center_x > target_center_x:
# 如果偏好左侧但文字在右侧,增加距离惩罚
distance += 1000
elif prefer_side == 'top' and rect_center_y > target_center_y:
# 如果偏好上方但文字在下方,增加距离惩罚
distance += 1000
elif prefer_side == 'bottom' and rect_center_y < target_center_y:
# 如果偏好下方但文字在上方,增加距离惩罚
distance += 1000
# 更新最近的文字
if distance < closest_distance:
closest_distance = distance
closest_status = status
closest_rect = rect
if closest_status and closest_rect:
# 打印找到的最近文字
print(f"Closest text found: {closest_status} at position: {closest_rect}")
return closest_status, closest_rect
return None
def get_search_region(self, range_config: List[float] or dict, base_region: Tuple[int, int, int, int]) -> Tuple[int, int, int, int]:
"""
根据配置获取搜索区域
Args:
range_config: 搜索范围配置,支持以下几种格式:
1. [x_percent, y_percent] - 百分比范围,正数从左上角开始,负数从右下角开始
2. {"center": [x_percent, y_percent, width_percent, height_percent]} - 以中心为0点的百分比
3. {"rect": [x, y, width, height]} - 直接定义像素坐标的查找框
base_region: 基础区域 (x, y, w, h)
Returns:
调整后的搜索区域 (x, y, w, h)
"""
x, y, w, h = base_region
# 处理不同类型的配置
if isinstance(range_config, list) and len(range_config) == 2:
# 原有方式:[x_percent, y_percent]
x_percent, y_percent = range_config
# 确保百分比在有效范围内
x_percent = min(max(x_percent, -100), 100)
y_percent = min(max(y_percent, -100), 100)
# 处理x方向正数从左侧开始负数从右侧开始
if x_percent >= 0:
search_w = int(w * x_percent / 100)
search_x = x
else:
search_w = int(w * abs(x_percent) / 100)
search_x = x + w - search_w
# 处理y方向正数从顶部开始负数从底部开始
if y_percent >= 0:
search_h = int(h * y_percent / 100)
search_y = y
else:
search_h = int(h * abs(y_percent) / 100)
search_y = y + h - search_h
elif isinstance(range_config, dict) and "center" in range_config:
# 中心坐标方式:{"center": [x_percent, y_percent, width_percent, height_percent]}
center_config = range_config["center"]
if len(center_config) == 4:
center_x_percent, center_y_percent, width_percent, height_percent = center_config
# 确保百分比在有效范围内
width_percent = min(max(width_percent, 0), 100)
height_percent = min(max(height_percent, 0), 100)
# 计算实际宽度和高度
search_w = int(w * width_percent / 100)
search_h = int(h * height_percent / 100)
# 计算中心点坐标
center_x = x + w // 2
center_y = y + h // 2
# 计算偏移量
offset_x = int(w * center_x_percent / 100)
offset_y = int(h * center_y_percent / 100)
# 计算搜索区域的左上角坐标
search_x = center_x + offset_x - search_w // 2
search_y = center_y + offset_y - search_h // 2
# 确保搜索区域在基础区域内
search_x = max(search_x, x)
search_y = max(search_y, y)
search_w = min(search_w, x + w - search_x)
search_h = min(search_h, y + h - search_y)
elif isinstance(range_config, dict) and "rect" in range_config:
# 直接定义方式:{"rect": [x, y, width, height]}
rect_config = range_config["rect"]
if len(rect_config) == 4:
rect_x, rect_y, rect_w, rect_h = rect_config
# 直接使用配置的坐标和大小
search_x = x + rect_x
search_y = y + rect_y
search_w = rect_w
search_h = rect_h
# 确保搜索区域在基础区域内
search_x = max(search_x, x)
search_y = max(search_y, y)
search_w = min(search_w, x + w - search_x)
search_h = min(search_h, y + h - search_y)
else:
# 默认使用全屏
search_x, search_y, search_w, search_h = base_region
return (search_x, search_y, search_w, search_h)
def find_character_coordinates(self, region: Optional[Tuple[int, int, int, int]] = None) -> Optional[Tuple[int, int]]:
"""
查找角色的坐标
Args:
region (Tuple[int, int, int, int], optional): 查找区域 (x, y, width, height)
Returns:
Optional[Tuple[int, int]]: 找到的角色坐标 (x, y),未找到返回 None
"""
import time
start_time = time.time()
try:
# 打印查找区域信息
if region:
print(f"开始查找角色坐标,查找区域: {region}")
else:
print("开始查找角色坐标,使用默认全屏区域")
# 识别指定区域内的文字
text_info_list = self.recognize_text(region)
# 打印识别到的文字信息
print(f"识别到 {len(text_info_list)} 个文本区域")
for i, text_info in enumerate(text_info_list):
text = text_info.get('text', '')
confidence = text_info.get('confidence', 0)
bbox = text_info.get('box', (0, 0, 0, 0))
print(f" [{i+1}] 文本: '{text}',置信度: {confidence:.2f},位置: {bbox}")
if not text_info_list:
print("未识别到任何文本,无法查找角色坐标")
return None
# 定义匹配时间和坐标格式的正则表达式
# 匹配类似 "寅时123,123"、"寅时(123,123)"、"寅时123123"、"寅时123.123"、"寅时(123123)"、"寅时123123" 等格式
# 匹配任何汉字加"时"的格式,提高识别灵活性
coord_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+', re.UNICODE)
# 匹配单独的时间部分,如 "寅时"
time_pattern = re.compile(r'[\u4e00-\u9fa5]+时', re.UNICODE)
# 匹配坐标部分,包括带时间前缀的,如 "(123,123)"、"123123"、"寅时123,123"、"寅时123.123"、"寅时(123123)"、"寅时123123"
coord_only_pattern = re.compile(r'[\u4e00-\u9fa5]+时[\s\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+|[\(]+(\d+)(?:[\s,.]+)?(\d+)[\s\)]+', re.UNICODE)
# 查找匹配的文字
print("\n尝试匹配完整坐标格式...")
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# 检查置信度
# if confidence < 0.7:
# continue
# 尝试匹配完整坐标格式
match = coord_pattern.search(text)
if match:
# 提取坐标
x_coord = int(match.group(1))
y_coord = int(match.group(2))
end_time = time.time()
print(f"✓ 找到角色坐标: ({x_coord}, {y_coord})")
print(f" 匹配文本: '{text}',置信度: {confidence:.2f}")
print(f" 查找耗时: {end_time - start_time:.2f}")
return (x_coord, y_coord)
# 如果没有找到完整匹配,尝试查找分离的时间和坐标
print("\n未找到完整坐标格式,尝试查找分离的时间和坐标...")
# 首先找到时间部分
time_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# if confidence < 0.7:
# continue
if time_pattern.search(text):
time_texts.append(text_info)
print(f"找到 {len(time_texts)} 个时间文本")
for i, time_info in enumerate(time_texts):
text = time_info.get('text', '')
confidence = time_info.get('confidence', 0)
bbox = time_info.get('bbox', (0, 0, 0, 0))
print(f" 时间 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}")
# 然后找到坐标部分
coord_texts = []
for text_info in text_info_list:
text = text_info.get('text', '')
# 去除空格,提高匹配成功率
text = text.replace(' ', '')
confidence = text_info.get('confidence', 0)
# if confidence < 0.7:
# continue
if coord_only_pattern.search(text):
coord_texts.append(text_info)
print(f"找到 {len(coord_texts)} 个坐标文本")
for i, coord_info in enumerate(coord_texts):
text = coord_info.get('text', '')
confidence = text_info.get('confidence', 0)
bbox = coord_info.get('bbox', (0, 0, 0, 0))
print(f" 坐标 [{i+1}]: '{text}',置信度: {confidence:.2f},位置: {bbox}")
# 检查时间和坐标是否在附近
print("\n检查时间和坐标是否在附近...")
for time_info in time_texts:
time_bbox = time_info.get('bbox', (0, 0, 0, 0))
time_x, time_y, time_w, time_h = time_bbox
time_text = time_info.get('text', '')
for coord_info in coord_texts:
coord_bbox = coord_info.get('bbox', (0, 0, 0, 0))
coord_x, coord_y, coord_w, coord_h = coord_bbox
coord_text = coord_info.get('text', '')
# 计算距离
distance = ((coord_x - (time_x + time_w)) ** 2 + (coord_y - time_y) ** 2) ** 0.5
print(f" 时间 '{time_text}' 与坐标 '{coord_text}' 的距离: {distance:.1f} 像素")
# 检查坐标是否在时间的右侧或下方附近距离不超过100像素
if distance < 100:
# 提取坐标
text = coord_info.get('text', '')
match = coord_only_pattern.search(text)
if match:
# 处理正则表达式的捕获组,考虑带时间前缀和不带时间前缀的情况
if match.group(1) and match.group(2):
x_coord = int(match.group(1))
y_coord = int(match.group(2))
elif match.group(3) and match.group(4):
x_coord = int(match.group(3))
y_coord = int(match.group(4))
else:
continue
end_time = time.time()
print(f"✓ 找到角色坐标 (分离文本): ({x_coord}, {y_coord})")
print(f" 时间文本: '{time_text}'")
print(f" 坐标文本: '{coord_text}'")
print(f" 距离: {distance:.1f} 像素")
print(f" 查找耗时: {end_time - start_time:.2f}")
return (x_coord, y_coord)
end_time = time.time()
print(f"\n✗ 未找到角色坐标")
print(f" 查找耗时: {end_time - start_time:.2f}")
return None
except Exception as e:
end_time = time.time()
print(f"\n✗ 查找角色坐标时出错: {e}")
print(f" 查找耗时: {end_time - start_time:.2f}")
import traceback
print(f" 错误详情: {traceback.format_exc()}")
return None