<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic speed issue DBR 13+ for R in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/speed-issue-dbr-13-for-r/m-p/40397#M27193</link>
    <description>&lt;P&gt;I got a notebook running on DBR 12.2 with the following R code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;install.packages("microbenchmark") 
install.packages("furrr") 

library(microbenchmark) 
library(tidyverse) 
# example tibble 
df_test &amp;lt;- tibble(id = 1:100000, street_raw = rep("Bahnhofstrasse 12", 100000)) 

# function 
test_fc &amp;lt;- function(str = "") { 
value = str_to_lower(str) 
value = str_replace_all(value, pattern="n", replacement="N") 
value = str_replace_all(value, pattern="h", replacement="H") 
return(value) } 

# single core with purrr package 
microbenchmark(df_test %&amp;gt;% mutate(street_all = map_chr(street_raw, test_fc)), times = 10) 

# DBR 12.2 / median 9.300949 seconds 
# DBR 13.2 / median 16.04199 seconds 

# multi core with furrr package 
library(furrr) 
plan(multisession) 

microbenchmark(df_test %&amp;gt;% mutate(street_all = future_map_chr(street_raw, test_fc)), times = 10) 

# DBR 12.2 / median 1.861389 seconds 
# DBR 13.2 / median 2.781327 seconds &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;with a cluster (Standard_F8s, 16GB RAM, 8 Core) DBR 12.2 i got the result in:&lt;BR /&gt;single core 9.30s / multi core 1.86s&lt;BR /&gt;&lt;BR /&gt;with the same cluster and DBR 13.2 i got the result in:&lt;BR /&gt;single core 16.04s / multi core 2.78s&lt;BR /&gt;&lt;BR /&gt;Can anyone give me some advise to speed up also with DBR 13+ or is it in general slower?&lt;/P&gt;</description>
    <pubDate>Fri, 18 Aug 2023 06:27:30 GMT</pubDate>
    <dc:creator>romangehrn</dc:creator>
    <dc:date>2023-08-18T06:27:30Z</dc:date>
    <item>
      <title>speed issue DBR 13+ for R</title>
      <link>https://community.databricks.com/t5/data-engineering/speed-issue-dbr-13-for-r/m-p/40397#M27193</link>
      <description>&lt;P&gt;I got a notebook running on DBR 12.2 with the following R code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;install.packages("microbenchmark") 
install.packages("furrr") 

library(microbenchmark) 
library(tidyverse) 
# example tibble 
df_test &amp;lt;- tibble(id = 1:100000, street_raw = rep("Bahnhofstrasse 12", 100000)) 

# function 
test_fc &amp;lt;- function(str = "") { 
value = str_to_lower(str) 
value = str_replace_all(value, pattern="n", replacement="N") 
value = str_replace_all(value, pattern="h", replacement="H") 
return(value) } 

# single core with purrr package 
microbenchmark(df_test %&amp;gt;% mutate(street_all = map_chr(street_raw, test_fc)), times = 10) 

# DBR 12.2 / median 9.300949 seconds 
# DBR 13.2 / median 16.04199 seconds 

# multi core with furrr package 
library(furrr) 
plan(multisession) 

microbenchmark(df_test %&amp;gt;% mutate(street_all = future_map_chr(street_raw, test_fc)), times = 10) 

# DBR 12.2 / median 1.861389 seconds 
# DBR 13.2 / median 2.781327 seconds &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;with a cluster (Standard_F8s, 16GB RAM, 8 Core) DBR 12.2 i got the result in:&lt;BR /&gt;single core 9.30s / multi core 1.86s&lt;BR /&gt;&lt;BR /&gt;with the same cluster and DBR 13.2 i got the result in:&lt;BR /&gt;single core 16.04s / multi core 2.78s&lt;BR /&gt;&lt;BR /&gt;Can anyone give me some advise to speed up also with DBR 13+ or is it in general slower?&lt;/P&gt;</description>
      <pubDate>Fri, 18 Aug 2023 06:27:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/speed-issue-dbr-13-for-r/m-p/40397#M27193</guid>
      <dc:creator>romangehrn</dc:creator>
      <dc:date>2023-08-18T06:27:30Z</dc:date>
    </item>
  </channel>
</rss>

