74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
import subprocess
|
|
import tempfile
|
|
import os
|
|
import sys
|
|
import uuid
|
|
import re
|
|
|
|
CAPTURE_AREA = '#captureArea'
|
|
|
|
def main(input_file):
|
|
if not input_file:
|
|
print("Usage: python3 script.py path/to/document.adoc")
|
|
sys.exit(1)
|
|
|
|
output_directory = os.path.dirname(input_file)
|
|
output_basename = os.path.splitext(os.path.basename(input_file))[0]
|
|
output_docx = os.path.join(output_directory, f"{output_basename}.docx")
|
|
images_directory = "./generated_images"
|
|
|
|
# Ensure the images directory exists
|
|
os.makedirs(images_directory, exist_ok=True)
|
|
|
|
# Create a temporary directory
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
html_path = os.path.join(temp_dir, f"{output_basename}_temp.html")
|
|
|
|
# Convert AsciiDoc to HTML first, handling images next
|
|
subprocess.run(['asciidoctor', '-b', 'html', '-o', html_path, input_file], check=True)
|
|
|
|
# Read the generated HTML
|
|
with open(html_path, 'r') as file:
|
|
content = file.read()
|
|
|
|
# Modify the content by replacing image placeholders with actual image tags
|
|
modified_content = replace_images(content, images_directory)
|
|
|
|
# Rewrite the modified HTML back to file
|
|
with open(html_path, 'w') as file:
|
|
file.write(modified_content)
|
|
|
|
# Convert the final HTML to DOCX
|
|
subprocess.run(['pandoc', '-f', 'html', '-t', 'docx', '-o', output_docx, html_path], check=True)
|
|
|
|
print(f"DOCX file created successfully: {output_docx}")
|
|
|
|
|
|
def replace_images(content, image_dir):
|
|
def replacement(match):
|
|
html_content = match.group(1).strip()
|
|
image_id = str(uuid.uuid4())
|
|
output_path = os.path.join(image_dir, f"{image_id}.png")
|
|
|
|
# Prepare the HTML content by wrapping it in an HTML document
|
|
full_html = f"<html><body>{html_content}</body></html>"
|
|
|
|
# Call the Node.js script to convert HTML to an image
|
|
command = ['node', 'saveAsImage.js', full_html, output_path, CAPTURE_AREA]
|
|
result = subprocess.run(command, capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
print("Failed to generate image:", result.stderr)
|
|
return "" # Return empty if image generation fails
|
|
else:
|
|
# Return the image tag for HTML
|
|
return f'<img src="{output_path}" alt="Generated Image" style="width:75%;">'
|
|
|
|
# Use re.DOTALL to ensure that multiline HTML content is matched correctly
|
|
# The '?' in '.*?' makes the matching lazy, which helps in matching multiple instances correctly
|
|
return re.sub(r'<!--image-->(.*?)<!--/image-->', replacement, content, flags=re.DOTALL)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1])
|