import subprocess import tempfile import os import sys import uuid import re CAPTURE_AREA = '#captureArea' def main(input_file): if not input_file: print("Usage: python3 script.py path/to/document.adoc") sys.exit(1) output_directory = os.path.dirname(input_file) output_basename = os.path.splitext(os.path.basename(input_file))[0] output_docx = os.path.join(output_directory, f"{output_basename}.docx") images_directory = "./generated_images" # Ensure the images directory exists os.makedirs(images_directory, exist_ok=True) # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: html_path = os.path.join(temp_dir, f"{output_basename}_temp.html") # Convert AsciiDoc to HTML first, handling images next subprocess.run(['asciidoctor', '-b', 'html', '-o', html_path, input_file], check=True) # Read the generated HTML with open(html_path, 'r') as file: content = file.read() # Modify the content by replacing image placeholders with actual image tags modified_content = replace_images(content, images_directory) # Rewrite the modified HTML back to file with open(html_path, 'w') as file: file.write(modified_content) # Convert the final HTML to DOCX subprocess.run(['pandoc', '-f', 'html', '-t', 'docx', '-o', output_docx, html_path], check=True) print(f"DOCX file created successfully: {output_docx}") def replace_images(content, image_dir): def replacement(match): html_content = match.group(1).strip() image_id = str(uuid.uuid4()) output_path = os.path.join(image_dir, f"{image_id}.png") # Prepare the HTML content by wrapping it in an HTML document full_html = f"{html_content}" # Call the Node.js script to convert HTML to an image command = ['node', 'saveAsImage.js', full_html, output_path, CAPTURE_AREA] result = subprocess.run(command, capture_output=True, text=True) if result.returncode != 0: print("Failed to generate image:", result.stderr) return "" # Return empty if image generation fails else: # Return the image tag for HTML return f'Generated Image' # Use re.DOTALL to ensure that multiline HTML content is matched correctly # The '?' in '.*?' makes the matching lazy, which helps in matching multiple instances correctly return re.sub(r'(.*?)', replacement, content, flags=re.DOTALL) if __name__ == "__main__": main(sys.argv[1])