-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTML2PDF.py
More file actions
113 lines (92 loc) · 4.3 KB
/
HTML2PDF.py
File metadata and controls
113 lines (92 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import argparse
import asyncio
import os
from pathlib import Path
from playwright.async_api import async_playwright
async def convert_single(input_source: str, output_path: str, is_url: bool):
"""将单个本地 HTML 或在线 URL 转换为 PDF"""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
try:
if is_url:
print(f"[*] Loading URL: {input_source}")
await page.goto(input_source, wait_until="networkidle")
else:
abs_path = os.path.abspath(input_source)
file_url = f"file://{abs_path}"
print(f"[*] Loading local file: {abs_path}")
await page.goto(file_url, wait_until="networkidle")
await page.pdf(
path=output_path,
format="A4",
print_background=True,
margin={"top": "1cm", "right": "1cm", "bottom": "1cm", "left": "1cm"}
)
print(f"[+] Successfully saved PDF to: {output_path}")
except Exception as e:
print(f"[-] Error during conversion: {e}")
finally:
await browser.close()
async def convert_batch(input_dir: str, output_dir: str):
"""批量将目录中的所有 HTML 转换为 PDF"""
input_path = Path(input_dir)
output_path = Path(output_dir)
if not input_path.exists() or not input_path.is_dir():
print(f"[-] Input directory not found: '{input_dir}'")
return
output_path.mkdir(parents=True, exist_ok=True)
html_files = list(input_path.rglob("*.html")) + list(input_path.rglob("*.htm"))
if not html_files:
print(f"[!] No HTML files found in '{input_dir}'.")
return
print(f"[*] Found {len(html_files)} HTML files. Starting batch conversion...\n")
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
success_count = 0
error_count = 0
for html_file in html_files:
abs_input_path = html_file.resolve()
file_url = f"file://{abs_input_path}"
pdf_filename = f"{html_file.stem}.pdf"
abs_output_path = output_path / pdf_filename
print(f"[*] Processing: {html_file.name}")
page = await browser.new_page()
try:
await page.goto(file_url, wait_until="networkidle")
await page.pdf(
path=str(abs_output_path),
format="A4",
print_background=True,
margin={"top": "1cm", "right": "1cm", "bottom": "1cm", "left": "1cm"}
)
print(f" [+] Success -> {pdf_filename}")
success_count += 1
except Exception as e:
print(f" [-] Failed: {e}")
error_count += 1
finally:
await page.close()
await browser.close()
print(f"\n[=] Batch completed! Success: {success_count}, Failed: {error_count}.")
print(f"[=] Files saved to: {output_path.resolve()}")
def main():
parser = argparse.ArgumentParser(description="HTML2PDF: A robust HTML/URL to PDF converter using Playwright.")
# 互斥参数组:强制用户选择单文件模式或批量模式
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("-u", "--url", help="Convert a single online URL to PDF.")
group.add_argument("-f", "--file", help="Convert a single local HTML file to PDF.")
group.add_argument("-b", "--batch", help="Batch convert all HTML files in a directory.")
parser.add_argument("-o", "--output", required=True, help="Output PDF file path (for -u/-f) or Output directory (for -b).")
args = parser.parse_args()
if args.url:
asyncio.run(convert_single(args.url, args.output, is_url=True))
elif args.file:
if not os.path.exists(args.file):
print(f"[-] Local file not found: {args.file}")
else:
asyncio.run(convert_single(args.file, args.output, is_url=False))
elif args.batch:
asyncio.run(convert_batch(args.batch, args.output))
if __name__ == "__main__":
main()