<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: pyspark - regexp_extract in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20947#M14195</link>
    <description>&lt;P&gt;\s*\[[^)]*\]  removes the square brackets and everything inside it and the space too (well actually substitutes it with nothing).&lt;/P&gt;&lt;P&gt;&lt;A href="https://regex101.com/r/tv9pbJ/1" target="test_blank"&gt;https://regex101.com/r/tv9pbJ/1&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Haven't checked if spark can do regex substitution.&lt;/P&gt;</description>
    <pubDate>Thu, 24 Nov 2022 15:36:04 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2022-11-24T15:36:04Z</dc:date>
    <item>
      <title>pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20942#M14190</link>
      <description>&lt;P&gt;hello everyone, I'm creating a regex expression to fetch only the value of a string, but some values ​​are negative. I am not able to create the rule to compose the negative value. can you help me?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql.functions  import regexp_extract
from pyspark.sql.types import StructType,StructField, StringType
&amp;nbsp;
data = [("01","[$R$-pt-BR] 150.00"),
        ("02", "-[$R$-pt-BR] 379.52" ),
        ("03", "[$R$-pt-BR] 185.16" ),
        ("04", "[$R$-pt-BR] 185.16" ),]
&amp;nbsp;
schema = StructType([ \
    StructField("id",StringType(),True), \
    StructField("description",StringType(),True), 
  ])
&amp;nbsp;
df = spark.createDataFrame(data=data,schema=schema)
df.display()
&amp;nbsp;
df1 = df\
.withColumn("value", regexp_extract('description', r"[\d]{1,4}.[\d]{1,4}", 0))
df1.display()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1124i75A062EBA3D38E4F/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Nov 2022 17:41:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20942#M14190</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-11-23T17:41:29Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20943#M14191</link>
      <description>&lt;P&gt;@Werner Stinckens​&amp;nbsp;&lt;/P&gt;&lt;P&gt;can you help me?&lt;/P&gt;</description>
      <pubDate>Wed, 23 Nov 2022 18:50:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20943#M14191</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-11-23T18:50:17Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20944#M14192</link>
      <description>&lt;P&gt;Hi there, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Create a column to catch the minus "-": pattern is: "^[\-]?"&lt;/LI&gt;&lt;LI&gt;Create a column to catch the digits that you already done.&lt;/LI&gt;&lt;LI&gt;Concat these two columns above.&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope it fit your requirement. &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 08:59:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20944#M14192</guid>
      <dc:creator>NhatHoang</dc:creator>
      <dc:date>2022-11-24T08:59:04Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20945#M14193</link>
      <description>&lt;P&gt;I found another solution, but I didn't want to give up on regex. If you find a way, be sure to post. Thanks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1129iCBD773DC8767FCA5/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 12:26:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20945#M14193</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-11-24T12:26:51Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20946#M14194</link>
      <description>&lt;PRE&gt;&lt;CODE&gt;df1 = df\
.withColumn("value", regexp_extract('description', "[\d]{1,4}.[\d]{1,4}", 0))\
.withColumn("operador", regexp_extract('description', "^[\-]?", 0))\
.withColumn("value2", concat("operador","value"))
df1.display()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1126i251A044D213FC426/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;@Nhat Hoang​&amp;nbsp;, Thanks.&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 13:46:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20946#M14194</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-11-24T13:46:41Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20947#M14195</link>
      <description>&lt;P&gt;\s*\[[^)]*\]  removes the square brackets and everything inside it and the space too (well actually substitutes it with nothing).&lt;/P&gt;&lt;P&gt;&lt;A href="https://regex101.com/r/tv9pbJ/1" target="test_blank"&gt;https://regex101.com/r/tv9pbJ/1&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Haven't checked if spark can do regex substitution.&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 15:36:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20947#M14195</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-11-24T15:36:04Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20948#M14196</link>
      <description>&lt;P&gt;its like you need to find this pattern : "^[\-]?"&lt;/P&gt;</description>
      <pubDate>Thu, 01 Dec 2022 11:25:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20948#M14196</guid>
      <dc:creator>mcwir</dc:creator>
      <dc:date>2022-12-01T11:25:27Z</dc:date>
    </item>
    <item>
      <title>Re: pyspark - regexp_extract</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20949#M14197</link>
      <description>&lt;P&gt;Have you found the answer? If you are a student in college or school searching for free essay examples online, you may want to visit the website &lt;A href="https://writinguniverse.com/free-essay-examples/soccer/" alt="https://writinguniverse.com/free-essay-examples/soccer/" target="_blank"&gt;https://writinguniverse.com/free-essay-examples/soccer/&lt;/A&gt; here you will find a vast collection of free essay examples related to various topics, including soccer. These essay examples can be valuable resources to help you complete your essay assignments.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2023 12:50:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-regexp-extract/m-p/20949#M14197</guid>
      <dc:creator>ErinArmistead</dc:creator>
      <dc:date>2023-03-16T12:50:31Z</dc:date>
    </item>
  </channel>
</rss>

