Files
yidaima_tools/md_to_wechat.py
王鹏 a2f5875d1b init
2026-04-09 14:55:54 +08:00

293 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import re
import markdown
from markdown.extensions.codehilite import CodeHiliteExtension
from markdown.extensions.fenced_code import FencedCodeExtension
from premailer import transform
def convert_markdown_to_wechat(md_content: str) -> tuple[str, str]:
"""
将 Markdown 文本转换为微信公众号 HTML
Args:
md_content: Markdown 格式的文本
Returns:
(标题, HTML内容) 元组
"""
if not md_content or not md_content.strip():
return "", "<p></p>"
# 提取标题
title = extract_title(md_content)
# 1. 定义微信风格的基础 CSS 样式
# 微信对这些属性支持较好color, font-size, margin, padding, line-height
custom_css = """
.wechat-body {
font-family: -apple-system-font, BlinkMacSystemFont, "Helvetica Neue", "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei UI", "Microsoft YaHei", Arial, sans-serif;
font-size: 16px;
color: #353535;
line-height: 1.75;
padding: 10px;
}
h1 {
font-size: 24px;
color: #007aff;
border-bottom: 2px solid #007aff;
padding-bottom: 10px;
margin-top: 30px;
margin-bottom: 15px;
}
h2 {
font-size: 20px;
color: #007aff;
margin-top: 25px;
margin-bottom: 10px;
border-left: 4px solid #007aff;
padding-left: 10px;
}
h3 {
font-size: 18px;
color: #007aff;
margin-top: 20px;
margin-bottom: 10px;
}
h4 {
font-size: 16px;
font-weight: bold;
color: #007aff;
margin-top: 15px;
margin-bottom: 8px;
}
h5 {
font-size: 14px;
font-weight: bold;
color: #007aff;
margin-top: 12px;
margin-bottom: 6px;
}
p {
margin: 15px 0;
text-align: justify;
}
code {
background-color: #f8f8f8;
color: #ff502c;
padding: 2px 4px;
border-radius: 3px;
font-family: Consolas, Monaco, 'Andale Mono', monospace;
font-size: 14px;
}
pre {
background-color: #282c34;
color: #abb2bf;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
line-height: 1.4;
font-family: Consolas, Monaco, 'Andale Mono', monospace;
font-size: 13px;
}
pre code {
background-color: transparent;
color: inherit;
padding: 0;
border-radius: 0;
font-size: inherit;
}
ul, ol {
padding-left: 30px;
color: #555;
margin: 15px 0;
}
li {
margin: 8px 0;
}
blockquote {
border-left: 4px solid #007aff;
color: #666;
padding-left: 15px;
margin: 20px 0;
background-color: #f8f9fa;
font-style: italic;
}
img {
max-width: 100%;
border-radius: 4px;
display: block;
margin: 20px auto;
}
a {
color: #007aff;
text-decoration: none;
}
table {
width: 100%;
border-collapse: collapse;
margin: 15px 0;
font-size: 14px;
}
th, td {
border: 1px solid #ddd;
padding: 10px;
text-align: left;
}
th {
background-color: #f8f9fa;
font-weight: bold;
}
hr {
border: none;
border-top: 1px solid #e0e0e0;
margin: 30px 0;
}
"""
# 2. 将 Markdown 转为 HTML
# 使用 fenced_code 处理代码块codehilite 处理高亮
html_body = markdown.markdown(md_content, extensions=[
FencedCodeExtension(),
CodeHiliteExtension(css_class='highlight', linenums=False, guess_lang=False),
'tables',
'nl2br'
])
# 3. 包装在外层容器中
full_html = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>{custom_css}</style>
</head>
<body>
<div class="wechat-body">
{html_body}
</div>
</body>
</html>"""
# 4. 关键步骤:使用 premailer 将 CSS 内联化
# 它会扫描 <style> 里的选择器,然后转换成 <p style="margin: 15px 0; ...">
inline_html = transform(full_html)
# 5. 处理图片标签,优化微信兼容性
inline_html = process_images(inline_html)
# 6. 清理多余的空行和换行
inline_html = re.sub(r'\n\s*\n+', '\n', inline_html)
inline_html = re.sub(r'>\s*\n\s*<', '><', inline_html)
return title, inline_html
def extract_title(md_text: str) -> str:
"""从 Markdown 文本中提取标题"""
# 尝试提取一级标题
h1_match = re.search(r'^# (.+)$', md_text, re.MULTILINE)
if h1_match:
return h1_match.group(1).strip()
# 尝试提取二级标题
h2_match = re.search(r'^## (.+)$', md_text, re.MULTILINE)
if h2_match:
return h2_match.group(1).strip()
# 尝试提取三级标题
h3_match = re.search(r'^### (.+)$', md_text, re.MULTILINE)
if h3_match:
return h3_match.group(1).strip()
# 默认标题
return "公众号文章"
def process_images(html: str) -> str:
"""处理图片标签,优化微信兼容性"""
def fix_img_tag(match):
img_tag = match.group(0)
# 提取 src 属性
src_match = re.search(r'src="([^"]+)"', img_tag)
if src_match:
src = src_match.group(1)
# 1. 强制将 http 转换为 https
if src.startswith('http://'):
src = src.replace('http://', 'https://')
# 2. 补全微信必须的属性
img_tag = img_tag.replace(f'src="{src_match.group(1)}"', f'src="{src}"')
# 3. 添加 data-src如果没有
if 'data-src=' not in img_tag:
img_tag = img_tag.replace(f'src="{src}"', f'src="{src}" data-src="{src}"')
# 4. 强制给图片加行内样式,防止塌陷
img_tag = re.sub(r'\s*style="[^"]*"', '', img_tag)
img_tag = img_tag.replace('>', ' style="display: block; margin: 20px auto; width: 100% !important; height: auto !important; visibility: visible !important;" data-type="png">')
# 5. 移除可能导致冲突的 class
img_tag = re.sub(r'\s*class="[^"]*"', '', img_tag)
return img_tag
# 匹配所有 img 标签
return re.sub(r'<img[^>]+>', fix_img_tag, html)
def main():
"""测试转换功能"""
test_md = """
# 这是一个标题
这是一段正文,包含 **粗体** 和 *斜体* 文字。
## 二级标题
> 这是一个引言块,用于强调重要内容。
### 列表示例
- 第一项
- 第二项
- 第三项
#### 四级标题
### 代码示例
```python
print("Hello WeChat!")
```
行内代码:`print("hello")`
### 链接和图片
[访问百度](https://www.baidu.com)
---
**注意**:以上内容仅供测试使用。
"""
title, html = convert_markdown_to_wechat(test_md)
print(f"标题: {title}")
print(f"HTML 长度: {len(html)} 字符")
print("\nHTML 预览:")
print(html[:800] + "..." if len(html) > 800 else html)
# 保存到文件
output_file = "output_wechat.html"
with open(output_file, "w", encoding="utf-8") as f:
f.write(html)
print(f"\n已保存到: {output_file}")
if __name__ == "__main__":
main()