init
This commit is contained in:
292
md_to_wechat.py
Normal file
292
md_to_wechat.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import markdown
|
||||
from markdown.extensions.codehilite import CodeHiliteExtension
|
||||
from markdown.extensions.fenced_code import FencedCodeExtension
|
||||
from premailer import transform
|
||||
|
||||
|
||||
def convert_markdown_to_wechat(md_content: str) -> tuple[str, str]:
|
||||
"""
|
||||
将 Markdown 文本转换为微信公众号 HTML
|
||||
|
||||
Args:
|
||||
md_content: Markdown 格式的文本
|
||||
|
||||
Returns:
|
||||
(标题, HTML内容) 元组
|
||||
"""
|
||||
if not md_content or not md_content.strip():
|
||||
return "", "<p></p>"
|
||||
|
||||
# 提取标题
|
||||
title = extract_title(md_content)
|
||||
|
||||
# 1. 定义微信风格的基础 CSS 样式
|
||||
# 微信对这些属性支持较好:color, font-size, margin, padding, line-height
|
||||
custom_css = """
|
||||
.wechat-body {
|
||||
font-family: -apple-system-font, BlinkMacSystemFont, "Helvetica Neue", "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei UI", "Microsoft YaHei", Arial, sans-serif;
|
||||
font-size: 16px;
|
||||
color: #353535;
|
||||
line-height: 1.75;
|
||||
padding: 10px;
|
||||
}
|
||||
h1 {
|
||||
font-size: 24px;
|
||||
color: #007aff;
|
||||
border-bottom: 2px solid #007aff;
|
||||
padding-bottom: 10px;
|
||||
margin-top: 30px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
h2 {
|
||||
font-size: 20px;
|
||||
color: #007aff;
|
||||
margin-top: 25px;
|
||||
margin-bottom: 10px;
|
||||
border-left: 4px solid #007aff;
|
||||
padding-left: 10px;
|
||||
}
|
||||
h3 {
|
||||
font-size: 18px;
|
||||
color: #007aff;
|
||||
margin-top: 20px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
h4 {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
color: #007aff;
|
||||
margin-top: 15px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
h5 {
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
color: #007aff;
|
||||
margin-top: 12px;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
p {
|
||||
margin: 15px 0;
|
||||
text-align: justify;
|
||||
}
|
||||
code {
|
||||
background-color: #f8f8f8;
|
||||
color: #ff502c;
|
||||
padding: 2px 4px;
|
||||
border-radius: 3px;
|
||||
font-family: Consolas, Monaco, 'Andale Mono', monospace;
|
||||
font-size: 14px;
|
||||
}
|
||||
pre {
|
||||
background-color: #282c34;
|
||||
color: #abb2bf;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
line-height: 1.4;
|
||||
font-family: Consolas, Monaco, 'Andale Mono', monospace;
|
||||
font-size: 13px;
|
||||
}
|
||||
pre code {
|
||||
background-color: transparent;
|
||||
color: inherit;
|
||||
padding: 0;
|
||||
border-radius: 0;
|
||||
font-size: inherit;
|
||||
}
|
||||
ul, ol {
|
||||
padding-left: 30px;
|
||||
color: #555;
|
||||
margin: 15px 0;
|
||||
}
|
||||
li {
|
||||
margin: 8px 0;
|
||||
}
|
||||
blockquote {
|
||||
border-left: 4px solid #007aff;
|
||||
color: #666;
|
||||
padding-left: 15px;
|
||||
margin: 20px 0;
|
||||
background-color: #f8f9fa;
|
||||
font-style: italic;
|
||||
}
|
||||
img {
|
||||
max-width: 100%;
|
||||
border-radius: 4px;
|
||||
display: block;
|
||||
margin: 20px auto;
|
||||
}
|
||||
a {
|
||||
color: #007aff;
|
||||
text-decoration: none;
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: 15px 0;
|
||||
font-size: 14px;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background-color: #f8f9fa;
|
||||
font-weight: bold;
|
||||
}
|
||||
hr {
|
||||
border: none;
|
||||
border-top: 1px solid #e0e0e0;
|
||||
margin: 30px 0;
|
||||
}
|
||||
"""
|
||||
|
||||
# 2. 将 Markdown 转为 HTML
|
||||
# 使用 fenced_code 处理代码块,codehilite 处理高亮
|
||||
html_body = markdown.markdown(md_content, extensions=[
|
||||
FencedCodeExtension(),
|
||||
CodeHiliteExtension(css_class='highlight', linenums=False, guess_lang=False),
|
||||
'tables',
|
||||
'nl2br'
|
||||
])
|
||||
|
||||
# 3. 包装在外层容器中
|
||||
full_html = f"""<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>{custom_css}</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="wechat-body">
|
||||
{html_body}
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# 4. 关键步骤:使用 premailer 将 CSS 内联化
|
||||
# 它会扫描 <style> 里的选择器,然后转换成 <p style="margin: 15px 0; ...">
|
||||
inline_html = transform(full_html)
|
||||
|
||||
# 5. 处理图片标签,优化微信兼容性
|
||||
inline_html = process_images(inline_html)
|
||||
|
||||
# 6. 清理多余的空行和换行
|
||||
inline_html = re.sub(r'\n\s*\n+', '\n', inline_html)
|
||||
inline_html = re.sub(r'>\s*\n\s*<', '><', inline_html)
|
||||
|
||||
return title, inline_html
|
||||
|
||||
|
||||
def extract_title(md_text: str) -> str:
|
||||
"""从 Markdown 文本中提取标题"""
|
||||
# 尝试提取一级标题
|
||||
h1_match = re.search(r'^# (.+)$', md_text, re.MULTILINE)
|
||||
if h1_match:
|
||||
return h1_match.group(1).strip()
|
||||
|
||||
# 尝试提取二级标题
|
||||
h2_match = re.search(r'^## (.+)$', md_text, re.MULTILINE)
|
||||
if h2_match:
|
||||
return h2_match.group(1).strip()
|
||||
|
||||
# 尝试提取三级标题
|
||||
h3_match = re.search(r'^### (.+)$', md_text, re.MULTILINE)
|
||||
if h3_match:
|
||||
return h3_match.group(1).strip()
|
||||
|
||||
# 默认标题
|
||||
return "公众号文章"
|
||||
|
||||
|
||||
def process_images(html: str) -> str:
|
||||
"""处理图片标签,优化微信兼容性"""
|
||||
def fix_img_tag(match):
|
||||
img_tag = match.group(0)
|
||||
|
||||
# 提取 src 属性
|
||||
src_match = re.search(r'src="([^"]+)"', img_tag)
|
||||
if src_match:
|
||||
src = src_match.group(1)
|
||||
|
||||
# 1. 强制将 http 转换为 https
|
||||
if src.startswith('http://'):
|
||||
src = src.replace('http://', 'https://')
|
||||
|
||||
# 2. 补全微信必须的属性
|
||||
img_tag = img_tag.replace(f'src="{src_match.group(1)}"', f'src="{src}"')
|
||||
|
||||
# 3. 添加 data-src(如果没有)
|
||||
if 'data-src=' not in img_tag:
|
||||
img_tag = img_tag.replace(f'src="{src}"', f'src="{src}" data-src="{src}"')
|
||||
|
||||
# 4. 强制给图片加行内样式,防止塌陷
|
||||
img_tag = re.sub(r'\s*style="[^"]*"', '', img_tag)
|
||||
img_tag = img_tag.replace('>', ' style="display: block; margin: 20px auto; width: 100% !important; height: auto !important; visibility: visible !important;" data-type="png">')
|
||||
|
||||
# 5. 移除可能导致冲突的 class
|
||||
img_tag = re.sub(r'\s*class="[^"]*"', '', img_tag)
|
||||
|
||||
return img_tag
|
||||
|
||||
# 匹配所有 img 标签
|
||||
return re.sub(r'<img[^>]+>', fix_img_tag, html)
|
||||
|
||||
|
||||
def main():
|
||||
"""测试转换功能"""
|
||||
test_md = """
|
||||
# 这是一个标题
|
||||
|
||||
这是一段正文,包含 **粗体** 和 *斜体* 文字。
|
||||
|
||||
## 二级标题
|
||||
|
||||
> 这是一个引言块,用于强调重要内容。
|
||||
|
||||
### 列表示例
|
||||
|
||||
- 第一项
|
||||
- 第二项
|
||||
- 第三项
|
||||
|
||||
#### 四级标题
|
||||
|
||||
### 代码示例
|
||||
|
||||
```python
|
||||
print("Hello WeChat!")
|
||||
```
|
||||
|
||||
行内代码:`print("hello")`
|
||||
|
||||
### 链接和图片
|
||||
|
||||
[访问百度](https://www.baidu.com)
|
||||
|
||||
---
|
||||
|
||||
**注意**:以上内容仅供测试使用。
|
||||
"""
|
||||
|
||||
title, html = convert_markdown_to_wechat(test_md)
|
||||
|
||||
print(f"标题: {title}")
|
||||
print(f"HTML 长度: {len(html)} 字符")
|
||||
print("\nHTML 预览:")
|
||||
print(html[:800] + "..." if len(html) > 800 else html)
|
||||
|
||||
# 保存到文件
|
||||
output_file = "output_wechat.html"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
print(f"\n已保存到: {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user