<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Job aborted stage failure java.sql.SQLRecoverableException: IO Error: Connection reset by peer in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80098#M35928</link>
    <description>&lt;P&gt;Try using this code .&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;import pyspark
from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("OracleToDatabricks").getOrCreate()

# Oracle connection properties
conn = "jdbc:oracle:thin:@//&amp;lt;host&amp;gt;:&amp;lt;port&amp;gt;/&amp;lt;service_name&amp;gt;"
user = "&amp;lt;username&amp;gt;"
pwd = "&amp;lt;password&amp;gt;"
driver = "oracle.jdbc.driver.OracleDriver"
pQuery = "&amp;lt;table_name&amp;gt;"
lbound = 1
ubound = 1000000
batch_size = 10000

properties = {
    "user": user,
    "password": pwd,
    "driver": driver,
    "autoReconnect": "true",
    "numPartitions": "20",
    "partitionColumn": "PARTITION_KEY",
    "lowerBound": lbound,
    "upperBound": ubound,
    "fetchSize": "10000"
}

for i in range(lbound, ubound, batch_size):
    lower_bound = i
    upper_bound = min(i + batch_size - 1, ubound)
    query = f"(SELECT * FROM {pQuery} WHERE PARTITION_KEY &amp;gt;= {lower_bound} AND PARTITION_KEY &amp;lt;= {upper_bound}) AS TEMP"
    df = spark.read.jdbc(url=conn, table=query, properties=properties)
    # Process and write the data to ADLS
    df.write.mode("append").parquet("path/to/adls")&lt;/LI-CODE&gt;</description>
    <pubDate>Tue, 23 Jul 2024 09:19:24 GMT</pubDate>
    <dc:creator>Rishabh-Pandey</dc:creator>
    <dc:date>2024-07-23T09:19:24Z</dc:date>
    <item>
      <title>Job aborted stage failure java.sql.SQLRecoverableException: IO Error: Connection reset by peer</title>
      <link>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80095#M35926</link>
      <description>&lt;P&gt;While ingesting data from Oracle to databricks(writing into ADLS) using jdbc I am getting connection reset by peer error when ingesting a large table which has millions of rows.I am using oracle sql developer and azure databricks.&lt;/P&gt;&lt;P&gt;I tried every way like using partition column (lower and upper bounds), predicates and also incremental loading none of them are working, please help me if anyone knows the solution&lt;BR /&gt;&lt;SPAN&gt;jdbc(url=conn, table= pQuery, properties={ "user": user, "password": pwd, "driver": driver, "autoReconnect": "true", #"numPartitions": "50", "numPartitions": "20", "partitionColumn": "PARTITION_KEY", "lowerBound": lbound, "upperBound": ubound, #"fetchSize": "90000000" "fetchSize": "900000" } )&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 23 Jul 2024 09:09:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80095#M35926</guid>
      <dc:creator>Sudharsan24</dc:creator>
      <dc:date>2024-07-23T09:09:26Z</dc:date>
    </item>
    <item>
      <title>Re: Job aborted stage failure java.sql.SQLRecoverableException: IO Error: Connection reset by peer</title>
      <link>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80098#M35928</link>
      <description>&lt;P&gt;Try using this code .&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;import pyspark
from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("OracleToDatabricks").getOrCreate()

# Oracle connection properties
conn = "jdbc:oracle:thin:@//&amp;lt;host&amp;gt;:&amp;lt;port&amp;gt;/&amp;lt;service_name&amp;gt;"
user = "&amp;lt;username&amp;gt;"
pwd = "&amp;lt;password&amp;gt;"
driver = "oracle.jdbc.driver.OracleDriver"
pQuery = "&amp;lt;table_name&amp;gt;"
lbound = 1
ubound = 1000000
batch_size = 10000

properties = {
    "user": user,
    "password": pwd,
    "driver": driver,
    "autoReconnect": "true",
    "numPartitions": "20",
    "partitionColumn": "PARTITION_KEY",
    "lowerBound": lbound,
    "upperBound": ubound,
    "fetchSize": "10000"
}

for i in range(lbound, ubound, batch_size):
    lower_bound = i
    upper_bound = min(i + batch_size - 1, ubound)
    query = f"(SELECT * FROM {pQuery} WHERE PARTITION_KEY &amp;gt;= {lower_bound} AND PARTITION_KEY &amp;lt;= {upper_bound}) AS TEMP"
    df = spark.read.jdbc(url=conn, table=query, properties=properties)
    # Process and write the data to ADLS
    df.write.mode("append").parquet("path/to/adls")&lt;/LI-CODE&gt;</description>
      <pubDate>Tue, 23 Jul 2024 09:19:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80098#M35928</guid>
      <dc:creator>Rishabh-Pandey</dc:creator>
      <dc:date>2024-07-23T09:19:24Z</dc:date>
    </item>
    <item>
      <title>Re: Job aborted stage failure java.sql.SQLRecoverableException: IO Error: Connection reset by peer</title>
      <link>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80114#M35937</link>
      <description>&lt;P&gt;Thanks for the reply, its still failing with same issue&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 23 Jul 2024 11:00:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/job-aborted-stage-failure-java-sql-sqlrecoverableexception-io/m-p/80114#M35937</guid>
      <dc:creator>Sudharsan24</dc:creator>
      <dc:date>2024-07-23T11:00:21Z</dc:date>
    </item>
  </channel>
</rss>

