<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Bookmark in pdf in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/bookmark-in-pdf/m-p/99860#M40116</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/133228"&gt;@Sudic29&lt;/a&gt;&amp;nbsp;can you please share more about what you have implemented so far?&lt;/P&gt;
&lt;P&gt;This requires dynamically tracking the page number during the PDF creation process. Example in python:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;from PyPDF2 import PdfReader, PdfWriter

def add_bookmarks_to_pdf(input_path, output_path, bookmarks):
    # Open the PDF file
    reader = PdfReader(input_path)
    writer = PdfWriter()

    # Add pages to the writer and create bookmarks
    for i, page in enumerate(reader.pages):
        writer.add_page(page)
        if i in bookmarks:
            writer.add_named_destination(bookmarks[i], i)

    # Write the output PDF with bookmarks
    with open(output_path, 'wb') as f:
        writer.write(f)

# Example bookmarks: {page_number: "Bookmark Name"}
bookmarks = {
    0: "Table 1",
    1: "Table 2",
    2: "Table 3"
}
add_bookmarks_to_pdf("input.pdf", "output.pdf", bookmarks)
&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What have you accomplished in PySpark?&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sat, 23 Nov 2024 18:01:09 GMT</pubDate>
    <dc:creator>VZLA</dc:creator>
    <dc:date>2024-11-23T18:01:09Z</dc:date>
    <item>
      <title>Bookmark in pdf</title>
      <link>https://community.databricks.com/t5/data-engineering/bookmark-in-pdf/m-p/99572#M40031</link>
      <description>&lt;P&gt;I am creating a pdf using pyspark and trying to make bookmarks for each table in the pages. All the bookmarks end up pointing to the first table in the first page. Please help me out here.&lt;/P&gt;</description>
      <pubDate>Thu, 21 Nov 2024 05:20:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bookmark-in-pdf/m-p/99572#M40031</guid>
      <dc:creator>Sudic29</dc:creator>
      <dc:date>2024-11-21T05:20:07Z</dc:date>
    </item>
    <item>
      <title>Re: Bookmark in pdf</title>
      <link>https://community.databricks.com/t5/data-engineering/bookmark-in-pdf/m-p/99860#M40116</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/133228"&gt;@Sudic29&lt;/a&gt;&amp;nbsp;can you please share more about what you have implemented so far?&lt;/P&gt;
&lt;P&gt;This requires dynamically tracking the page number during the PDF creation process. Example in python:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;from PyPDF2 import PdfReader, PdfWriter

def add_bookmarks_to_pdf(input_path, output_path, bookmarks):
    # Open the PDF file
    reader = PdfReader(input_path)
    writer = PdfWriter()

    # Add pages to the writer and create bookmarks
    for i, page in enumerate(reader.pages):
        writer.add_page(page)
        if i in bookmarks:
            writer.add_named_destination(bookmarks[i], i)

    # Write the output PDF with bookmarks
    with open(output_path, 'wb') as f:
        writer.write(f)

# Example bookmarks: {page_number: "Bookmark Name"}
bookmarks = {
    0: "Table 1",
    1: "Table 2",
    2: "Table 3"
}
add_bookmarks_to_pdf("input.pdf", "output.pdf", bookmarks)
&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What have you accomplished in PySpark?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 23 Nov 2024 18:01:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bookmark-in-pdf/m-p/99860#M40116</guid>
      <dc:creator>VZLA</dc:creator>
      <dc:date>2024-11-23T18:01:09Z</dc:date>
    </item>
  </channel>
</rss>

