yidaima_tools/md_to_wechat.py

from __future__ import annotations

import re
import markdown
from markdown.extensions.codehilite import CodeHiliteExtension
from markdown.extensions.fenced_code import FencedCodeExtension
from premailer import transform


def convert_markdown_to_wechat(md_content: str) -> tuple[str, str]:
    """
    将 Markdown 文本转换为微信公众号 HTML

    Args:
        md_content: Markdown 格式的文本

    Returns:
        (标题, HTML内容) 元组
    """
    if not md_content or not md_content.strip():
        return "", "<p></p>"

    # 提取标题
    title = extract_title(md_content)

    # 1. 定义微信风格的基础 CSS 样式
    # 微信对这些属性支持较好：color, font-size, margin, padding, line-height
    custom_css = """
    .wechat-body {
        font-family: -apple-system-font, BlinkMacSystemFont, "Helvetica Neue", "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei UI", "Microsoft YaHei", Arial, sans-serif;
        font-size: 16px;
        color: #353535;
        line-height: 1.75;
        padding: 10px;
    }
    h1 {
        font-size: 24px;
        color: #007aff;
        border-bottom: 2px solid #007aff;
        padding-bottom: 10px;
        margin-top: 30px;
        margin-bottom: 15px;
    }
    h2 {
        font-size: 20px;
        color: #007aff;
        margin-top: 25px;
        margin-bottom: 10px;
        border-left: 4px solid #007aff;
        padding-left: 10px;
    }
    h3 {
        font-size: 18px;
        color: #007aff;
        margin-top: 20px;
        margin-bottom: 10px;
    }
    h4 {
        font-size: 16px;
        font-weight: bold;
        color: #007aff;
        margin-top: 15px;
        margin-bottom: 8px;
    }
    h5 {
        font-size: 14px;
        font-weight: bold;
        color: #007aff;
        margin-top: 12px;
        margin-bottom: 6px;
    }
    p {
        margin: 15px 0;
        text-align: justify;
    }
    code {
        background-color: #f8f8f8;
        color: #ff502c;
        padding: 2px 4px;
        border-radius: 3px;
        font-family: Consolas, Monaco, 'Andale Mono', monospace;
        font-size: 14px;
    }
    pre {
        background-color: #282c34;
        color: #abb2bf;
        padding: 15px;
        border-radius: 5px;
        overflow-x: auto;
        line-height: 1.4;
        font-family: Consolas, Monaco, 'Andale Mono', monospace;
        font-size: 13px;
    }
    pre code {
        background-color: transparent;
        color: inherit;
        padding: 0;
        border-radius: 0;
        font-size: inherit;
    }
    ul, ol {
        padding-left: 30px;
        color: #555;
        margin: 15px 0;
    }
    li {
        margin: 8px 0;
    }
    blockquote {
        border-left: 4px solid #007aff;
        color: #666;
        padding-left: 15px;
        margin: 20px 0;
        background-color: #f8f9fa;
        font-style: italic;
    }
    img {
        max-width: 100%;
        border-radius: 4px;
        display: block;
        margin: 20px auto;
    }
    a {
        color: #007aff;
        text-decoration: none;
    }
    table {
        width: 100%;
        border-collapse: collapse;
        margin: 15px 0;
        font-size: 14px;
    }
    th, td {
        border: 1px solid #ddd;
        padding: 10px;
        text-align: left;
    }
    th {
        background-color: #f8f9fa;
        font-weight: bold;
    }
    hr {
        border: none;
        border-top: 1px solid #e0e0e0;
        margin: 30px 0;
    }
    """

    # 2. 将 Markdown 转为 HTML
    # 使用 fenced_code 处理代码块，codehilite 处理高亮
    html_body = markdown.markdown(md_content, extensions=[
        FencedCodeExtension(),
        CodeHiliteExtension(css_class='highlight', linenums=False, guess_lang=False),
        'tables',
        'nl2br'
    ])

    # 3. 包装在外层容器中
    full_html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <style>{custom_css}</style>
</head>
<body>
    <div class="wechat-body">
        {html_body}
    </div>
</body>
</html>"""

    # 4. 关键步骤：使用 premailer 将 CSS 内联化
    # 它会扫描 <style> 里的选择器，然后转换成 <p style="margin: 15px 0; ...">
    inline_html = transform(full_html)

    # 5. 处理图片标签，优化微信兼容性
    inline_html = process_images(inline_html)

    # 6. 清理多余的空行和换行
    inline_html = re.sub(r'\n\s*\n+', '\n', inline_html)
    inline_html = re.sub(r'>\s*\n\s*<', '><', inline_html)

    return title, inline_html


def extract_title(md_text: str) -> str:
    """从 Markdown 文本中提取标题"""
    # 尝试提取一级标题
    h1_match = re.search(r'^# (.+)$', md_text, re.MULTILINE)
    if h1_match:
        return h1_match.group(1).strip()

    # 尝试提取二级标题
    h2_match = re.search(r'^## (.+)$', md_text, re.MULTILINE)
    if h2_match:
        return h2_match.group(1).strip()

    # 尝试提取三级标题
    h3_match = re.search(r'^### (.+)$', md_text, re.MULTILINE)
    if h3_match:
        return h3_match.group(1).strip()

    # 默认标题
    return "公众号文章"


def process_images(html: str) -> str:
    """处理图片标签，优化微信兼容性"""
    def fix_img_tag(match):
        img_tag = match.group(0)

        # 提取 src 属性
        src_match = re.search(r'src="([^"]+)"', img_tag)
        if src_match:
            src = src_match.group(1)

            # 1. 强制将 http 转换为 https
            if src.startswith('http://'):
                src = src.replace('http://', 'https://')

            # 2. 补全微信必须的属性
            img_tag = img_tag.replace(f'src="{src_match.group(1)}"', f'src="{src}"')

            # 3. 添加 data-src（如果没有）
            if 'data-src=' not in img_tag:
                img_tag = img_tag.replace(f'src="{src}"', f'src="{src}" data-src="{src}"')

        # 4. 强制给图片加行内样式，防止塌陷
        img_tag = re.sub(r'\s*style="[^"]*"', '', img_tag)
        img_tag = img_tag.replace('>', ' style="display: block; margin: 20px auto; width: 100% !important; height: auto !important; visibility: visible !important;" data-type="png">')

        # 5. 移除可能导致冲突的 class
        img_tag = re.sub(r'\s*class="[^"]*"', '', img_tag)

        return img_tag

    # 匹配所有 img 标签
    return re.sub(r'<img[^>]+>', fix_img_tag, html)


def main():
    """测试转换功能"""
    test_md = """
# 这是一个标题

这是一段正文，包含 **粗体** 和 *斜体* 文字。

## 二级标题

> 这是一个引言块，用于强调重要内容。

### 列表示例

- 第一项
- 第二项
- 第三项

#### 四级标题

### 代码示例

```python
print("Hello WeChat!")
```

行内代码：`print("hello")`

### 链接和图片

[访问百度](https://www.baidu.com)

---

**注意**：以上内容仅供测试使用。
"""

    title, html = convert_markdown_to_wechat(test_md)

    print(f"标题: {title}")
    print(f"HTML 长度: {len(html)} 字符")
    print("\nHTML 预览:")
    print(html[:800] + "..." if len(html) > 800 else html)

    # 保存到文件
    output_file = "output_wechat.html"
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html)
    print(f"\n已保存到: {output_file}")


if __name__ == "__main__":
    main()