+ """
+
+ # Combine all content
+ final_content = '\n'.join(html_all_pages_content)
+ html_all_content = f"{html_header}{toc_html}{final_content}"
+
+ return html_all_content
+
+ except Exception as e:
+ raise DocumentationProcessingError(f"Error processing documentation: {e}")
+
+
+def generate_pdf(html_content, output_pdf, format_options=None):
+ """Generate PDF using Playwright with enhanced error handling"""
+ default_format = {
+ 'format': 'A4',
+ 'margin': {
+ 'top': '50px',
+ 'right': '50px',
+ 'bottom': '50px',
+ 'left': '50px'
+ },
+ 'print_background': True,
+ 'display_header_footer': True,
+ 'header_template': '
of
',
+ 'footer_template': '
'
+ }
+
+ format_options = format_options or default_format
+
+ try:
+ playwright = sync_playwright().start()
+ browser = playwright.chromium.launch()
+ context = browser.new_context()
+ page = context.new_page()
+
+ # Set viewport size for consistent rendering
+ page.set_viewport_size({"width": 1280, "height": 1024})
+
+ # Increase timeouts for better reliability
+ page.set_default_timeout(120000) # 2 minutes
+
+ # Set content and wait for loading
+ page.set_content(html_content, wait_until='networkidle')
+
+ # Additional waits for content
+ page.wait_for_load_state('networkidle')
+ page.wait_for_load_state('domcontentloaded')
+
+ # Generate PDF
+ page.pdf(path=output_pdf, **format_options)
+
+ except Exception as e:
+ raise DocumentationProcessingError(f"Error generating PDF: {str(e)}")
+ finally:
+ if 'page' in locals():
+ page.close()
+ if 'context' in locals():
+ context.close()
+ if 'browser' in locals():
+ browser.close()
+ if 'playwright' in locals():
+ playwright.stop()
+
+def main():
+ """Main execution function with error handling"""
+ repo_dir = "astro-docs"
+ repo_url = "https://github.com/withastro/docs.git"
+ branch = "main"
+ docs_dir = "src/content/docs/en"
+
+ output_pdf = f"Astro_Documentation_{datetime.now().strftime('%Y-%m-%d')}.pdf"
+ temp_dir = None
+
+ try:
+ # Add version and license information to console
+ print(f"""
+Astro Documentation PDF Generator v1.0.0
+Copyright (C) 2024 PacNPal
+This program comes with ABSOLUTELY NO WARRANTY; for details see the LICENSE file.
+This is free software, and you are welcome to redistribute it
+under certain conditions; see the LICENSE file for details.
+""")
+ # Create CSS if it doesn't exist
+ if not os.path.exists('styles.css'):
+ print("Creating default styles.css...")
+ create_default_css()
+ print("Default CSS file created.")
+
+ # Create temporary directory for processing
+ temp_dir = tempfile.mkdtemp()
+
+ # Clone repository
+ print("Cloning Astro documentation repository...")
+ clone_repo(repo_url, branch, docs_dir, repo_dir)
+
+ # Get and sort files
+ print("Finding and sorting documentation files...")
+ docs_dir_full_path = os.path.join(repo_dir, docs_dir)
+ files_to_process = get_files_sorted(docs_dir_full_path)
+ print(f"Found {len(files_to_process)} files to process")
+
+ # Create cover page
+ with open('styles.css', 'r', encoding='utf8') as f:
+ css_content = f.read()
+
+ cover_html = f"""
+
+
+
+
+
+
+
+
+
+
Astro Documentation
+
Generated on {datetime.now().strftime('%Y-%m-%d')}
+
+
+
+
+ """
+
+ # Process files and generate HTML
+ print("Processing documentation files...")
+ html_content = process_files(files_to_process, repo_dir, docs_dir)
+
+ # Combine cover and content
+ final_html = f"{cover_html}
{html_content}"
+ final_html = add_license_page(final_html)
+
+ # Generate PDF
+ print(f"Generating PDF: {output_pdf}")
+ generate_pdf(final_html, output_pdf)
+
+ print(f"Documentation successfully generated: {output_pdf}")
+
+ except DocumentationProcessingError as e:
+ print(f"Error: {e}")
+ return 1
+ except Exception as e:
+ print(f"Unexpected error: {e}")
+ print("This program is licensed under AGPL-3.0. Source code is available at: [Your Repository URL]")
+ return 1
+ finally:
+ # Cleanup
+ if temp_dir and os.path.exists(temp_dir):
+ shutil.rmtree(temp_dir, ignore_errors=True)
+
+if __name__ == "__main__":
+ exit(main())
diff --git a/export-docs.py b/export-docs.py
deleted file mode 100644
index fa9a7a6..0000000
--- a/export-docs.py
+++ /dev/null
@@ -1,440 +0,0 @@
-"""
-Nextjs Documentation PDF Generator
-Modified version of Docs-Exporter for Astro documentation
-
-Original work Copyright (C) 2024 Riyooo
-Modified work Copyright (C) 2024 PacNPal
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see
.
-
-Modifications:
-- Replaced wkhtmltopdf with Playwright for PDF generation
-- Improved error handling and reporting
-- Added support for custom headers, footers, and styles in the generated PDFs.
-- Enhanced error handling with `try-except` blocks, especially for frontmatter parsing and file operations.
-"""
-
-import os
-import markdown
-import tempfile
-import yaml
-import re
-import html
-from git import Repo, RemoteProgress
-from datetime import datetime
-from packaging import version
-from tqdm import tqdm
-from playwright.sync_api import sync_playwright
-
-def get_license_notice():
- """Return a formatted license notice for inclusion in output"""
- return """
-This PDF was generated by Nextjs Documentation PDF Generator
-Original work Copyright (C) 2024 Riyooo
-Modified work Copyright (C) 2024 PacNPal
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License version 3.
-Source code is available at: https://github.com/pacnpal/Docs-Exporter
-"""
-
-def add_license_page(html_content):
- """Add a license notice page to the HTML content"""
- license_html = f"""
-
-
License Notice
-
- {get_license_notice()}
-
-
Complete source code for this program is available at: https://github.com/pacnpal/Docs-Exporter
-
This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you
- are welcome to redistribute it under certain conditions. See the GNU Affero General
- Public License version 3 for details.
-
- """
- return html_content + license_html
-
-def process_image_paths(md_content):
- # Define a regular expression pattern to find image tags
- pattern = r'src(?:Light|Dark)="(.*?)"'
-
- # Function to replace the relative path with an absolute path
- def replace(match):
- relative_path = match.group(1)
- absolute_path = f'{base_path}{relative_path}{path_args}'
-
- # Print the original and new image URLs for debugging
- return f'src="{absolute_path}"'
-
- # Use the sub method to replace all occurrences
- return re.sub(pattern, replace, md_content)
-
-
-def preprocess_code_blocks(md_content):
- # Regular expression to match extended code blocks with filename and language
- pattern = r'```(\w+)?\s+filename="([^"]+)"\s*(switcher)?\n(.*?)```'
-
- def replace(match):
- language = match.group(1) if match.group(1) else ''
- filename = match.group(2)
- code_block = match.group(4)
-
- # Format the header with filename and language
- header = f'' if language else f''
-
- return f'{header}\n```{language}\n{code_block}\n```'
-
- # Replace all occurrences in the content
- return re.sub(pattern, replace, md_content, flags=re.DOTALL)
-
-
-def safe_load_frontmatter(frontmatter_content):
- try:
- return yaml.safe_load(frontmatter_content)
- except yaml.YAMLError:
- return None
-
-
-def preprocess_mdx_content(md_content):
- # Replace HTML tags in frontmatter
- md_content = re.sub(r'<(/?\w+)>', lambda m: html.escape(m.group(0)), md_content)
- return md_content
-
-
-def parse_frontmatter(md_content):
- lines = md_content.split('\n')
- if lines[0].strip() == '---':
- end_of_frontmatter = lines.index('---', 1)
- frontmatter = '\n'.join(lines[1:end_of_frontmatter])
- content = '\n'.join(lines[end_of_frontmatter + 1:])
- return frontmatter, content
- return None, md_content
-
-
-class CloneProgress(RemoteProgress):
- def __init__(self):
- super().__init__()
- self.pbar = tqdm()
-
- def update(self, op_code, cur_count, max_count=None, message=''):
- if max_count is not None:
- self.pbar.total = max_count
- self.pbar.update(cur_count - self.pbar.n)
-
- def finalize(self):
- self.pbar.close()
-
-
-def clone_repo(repo_url, branch, docs_dir, repo_dir):
- if not os.path.isdir(repo_dir):
- os.makedirs(repo_dir, exist_ok=True)
- print("Cloning repository...")
- repo = Repo.init(repo_dir)
- with repo.config_writer() as git_config:
- git_config.set_value("core", "sparseCheckout", "true")
-
- with open(os.path.join(repo_dir, ".git/info/sparse-checkout"), "w") as sparse_checkout_file:
- sparse_checkout_file.write(f"/{docs_dir}\n")
-
- origin = repo.create_remote("origin", repo_url)
- origin.fetch(progress=CloneProgress())
- repo.git.checkout(branch)
- print("Repository cloned.")
- else:
- print("Repository already exists. Updating...")
- repo = Repo(repo_dir)
- origin = repo.remotes.origin
- origin.fetch(progress=CloneProgress())
- repo.git.checkout(branch)
- origin.pull(progress=CloneProgress())
- print("Repository updated.")
-
-
-def is_file_open(file_path):
- if not os.path.exists(file_path):
- return False
- try:
- with open(file_path, 'a'):
- pass
- return False
- except PermissionError:
- return True
-
-
-def get_files_sorted(root_dir):
- all_files = []
- for root, _, files in os.walk(root_dir):
- for file in files:
- full_path = os.path.join(root, file)
- modified_basename = '!!!' + file if file in ['index.mdx', 'index.md'] else file
- sort_key = os.path.join(root, modified_basename)
- all_files.append((full_path, sort_key))
- all_files.sort(key=lambda x: x[1])
- return [full_path for full_path, _ in all_files]
-
-
-def preprocess_frontmatter(frontmatter):
- html_tags = {}
-
- def replace_tag(match):
- tag = match.group(0)
- placeholder = f"HTML_TAG_{len(html_tags)}"
- html_tags[placeholder] = tag
- return placeholder
-
- modified_frontmatter = re.sub(r'<[^>]+>', replace_tag, frontmatter)
- return modified_frontmatter, html_tags
-
-
-def restore_html_tags(parsed_data, html_tags):
- if isinstance(parsed_data, dict):
- for key, value in parsed_data.items():
- if isinstance(value, str):
- for placeholder, tag in html_tags.items():
- value = value.replace(placeholder, tag)
- value = html.escape(value)
- parsed_data[key] = value
- return parsed_data
-
-
-def process_files(files, repo_dir, docs_dir):
- toc = ""
- html_all_pages_content = ""
-
- html_header = f"""
-
-
-
-
-
- """
-
- numbering = [0]
-
- for index, file_path in enumerate(files):
- with open(file_path, 'r', encoding='utf8') as f:
- md_content = f.read()
-
- if Change_img_url:
- md_content = process_image_paths(md_content)
-
- md_content = preprocess_code_blocks(md_content)
- frontmatter, md_content = parse_frontmatter(md_content)
-
- if frontmatter:
- frontmatter, html_tags = preprocess_frontmatter(frontmatter)
- data = safe_load_frontmatter(frontmatter)
- if data is not None:
- data = restore_html_tags(data, html_tags)
- rel_path = os.path.relpath(file_path, os.path.join(repo_dir, docs_dir))
- depth = rel_path.count(os.sep)
- file_basename = os.path.basename(file_path)
- if file_basename.startswith("index.") and depth > 0:
- depth += -1
- indent = ' ' * 5 * depth
-
- while len(numbering) <= depth:
- numbering.append(0)
-
- numbering[depth] += 1
-
- for i in range(depth + 1, len(numbering)):
- numbering[i] = 0
-
- toc_numbering = f"{'.'.join(map(str, numbering[:depth + 1]))}"
- toc_title = data.get('title', os.path.splitext(os.path.basename(file_path))[0].title())
- toc_full_title = f"{toc_numbering} - {toc_title}"
- toc += f"{indent}
{toc_full_title}"
-
- html_page_content = f"""
-
{toc_full_title}
-
Documentation path: {file_path.replace(chr(92),'/').replace('.mdx', '').replace(repo_dir + '/' + docs_dir,'')}
-
Description: {data.get('description', 'No description')}
- """
- if data.get('related', {}):
- html_page_content += f"""
-
-
Related:
-
Title: {data.get('related', {}).get('title', 'Related')}
-
Related Description: {data.get('related', {}).get('description', 'No related description')}
-
Links:
-
- {''.join([f'- {link}
' for link in data.get('related', {}).get('links', [])])}
-
-
- """
- html_page_content += ''
- else:
- html_page_content = ""
- else:
- html_page_content = ""
-
- html_page_content += markdown.markdown(md_content, extensions=['fenced_code', 'codehilite', 'tables', 'footnotes', 'toc', 'abbr', 'attr_list', 'def_list', 'smarty', 'admonition'])
- html_all_pages_content += html_page_content
-
- if index < len(files) - 1:
- html_all_pages_content += '
'
-
- toc_html = f"""
"""
- html_all_content = toc_html + html_all_pages_content
-
- html_all_pages_content = html_header + html_all_pages_content + ""
- toc_html = html_header + toc_html + "