<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Issue with VSCode Extension and Databricks Cluster Using Docker Image in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/issue-with-vscode-extension-and-databricks-cluster-using-docker/m-p/80009#M7870</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp;thanks for a such quick response.&lt;/P&gt;&lt;P&gt;Actually, I am using the Dockerfile from the Databricks runtime example here:&amp;nbsp;&lt;A href="https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile" target="_self"&gt;https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile&lt;/A&gt;&amp;nbsp;. The configuration with the VSCode extensions is fine since I already mentioned that the "upload and run python file" command works with a standard cluster.&lt;/P&gt;&lt;P&gt;This is my Dockerfile:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;# This Dockerfile creates a clean Databricks runtime 12.2 LTS without any library ready to deploy to Databricks
FROM databricksruntime/minimal:14.3-LTS
# These are the versions compatible for DBR 12.x

ARG python_version="3.9"
ARG pip_version="22.3.1"
ARG setuptools_version="65.6.3"
ARG wheel_version="0.38.4"
ARG virtualenv_version="20.16.7"

# Set the debconf frontend to Noninteractive
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

# Installs python 3.x and virtualenv for Spark and Notebooks
RUN sudo apt-get update &amp;amp;&amp;amp; sudo apt-get install dialog apt-utils curl build-essential fuse openssh-server software-properties-common --yes \
    &amp;amp;&amp;amp; sudo add-apt-repository ppa:deadsnakes/ppa -y &amp;amp;&amp;amp; sudo apt-get update \
    &amp;amp;&amp;amp; sudo apt-get install python${python_version} python${python_version}-dev python${python_version}-distutils --yes \
    &amp;amp;&amp;amp; curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
    &amp;amp;&amp;amp; /usr/bin/python${python_version} get-pip.py pip&amp;gt;=${pip_version} setuptools&amp;gt;=${setuptools_version} wheel&amp;gt;=${wheel_version} \
    &amp;amp;&amp;amp; rm get-pip.py \
    &amp;amp;&amp;amp; apt-get clean \
    &amp;amp;&amp;amp; rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

RUN /usr/local/bin/pip${python_version} install --no-cache-dir virtualenv==${virtualenv_version} \
    &amp;amp;&amp;amp; sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python${python_version}/dist-packages/virtualenv/seed/embed/base_embed.py \
    &amp;amp;&amp;amp; /usr/local/bin/pip${python_version} download pip==${pip_version} --dest \
    /usr/local/lib/python${python_version}/dist-packages/virtualenv_support/

# Initialize the default environment that Spark and notebooks will use
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python3 --no-download  --no-setuptools

# These python libraries are used by Databricks notebooks and the Python REPL
# You do not need to install pyspark - it is injected when the cluster is launched
# Versions are intended to reflect latest DBR: https://docs.databricks.com/release-notes/runtime/11.1.html#system-environment
RUN /databricks/python3/bin/pip install \
    six&amp;gt;=1.16.0 \
    jedi&amp;gt;=0.18.1 \
    # ensure minimum ipython version for Python autocomplete with jedi 0.17.x
    ipython&amp;gt;=8.10.0 \
    pyarrow&amp;gt;=8.0.0 \
    ipykernel&amp;gt;=6.17.1 \
    grpcio&amp;gt;=1.48.1 \
    grpcio-status&amp;gt;=1.48.1 \
    databricks-sdk&amp;gt;=0.1.6

# Specifies where Spark will look for the python process
ENV PYSPARK_PYTHON=/databricks/python3/bin/python3
# Specifies Tracking URI for MLflow Integration
ENV MLFLOW_TRACKING_URI='databricks'
# Make sure the USER env variable is set. The files exposed
# by dbfs-fuse will be owned by this user.
# Within the container, the USER is always root.
ENV USER root&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 22 Jul 2024 20:41:42 GMT</pubDate>
    <dc:creator>danmlopsmaz</dc:creator>
    <dc:date>2024-07-22T20:41:42Z</dc:date>
    <item>
      <title>Issue with VSCode Extension and Databricks Cluster Using Docker Image</title>
      <link>https://community.databricks.com/t5/get-started-discussions/issue-with-vscode-extension-and-databricks-cluster-using-docker/m-p/79550#M7868</link>
      <description>&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;P&gt;I've encountered a significant issue while using the VSCode extension for Databricks, particularly when working with a cluster configured with a Docker image. Here's a detailed description of the problem:&lt;/P&gt;&lt;H3&gt;Problem Description&lt;/H3&gt;&lt;P&gt;When attempting to upload and execute a Python file with VSCode to a Databricks cluster that utilizes a custom Docker image, the connection fails, and the extension does not function as expected.&lt;/P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="danmlopsmaz_0-1721482196211.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/9724i3DD4F408CDD65E4B/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="danmlopsmaz_0-1721482196211.png" alt="danmlopsmaz_0-1721482196211.png" /&gt;&lt;/span&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;==============================
Errors in 00-databricks-init-3331c3ed293013bfec5837e683d00cfe.py:
 
WARNING -  All log messages before absl: :InitializeLog() is called are written to STDERR
I0000 00 - 00: 1721481367.546267  105941 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;Error: CommandExecution.createAndWait: failed to reach Running state, got Error: [object Object]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="danmlopsmaz_1-1721482508346.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/9725i4A98CE8CB85433B2/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="danmlopsmaz_1-1721482508346.png" alt="danmlopsmaz_1-1721482508346.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;7/20/2024, 8:34:11 AM - Creating execution context on cluster 0719 ...
Error: CommandExecution.createAndWait: failed to reach Running state, got Error: [object Object]
Execution terminated&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;H3&gt;Potential Workarounds&lt;/H3&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Databricks connect&lt;/STRONG&gt;: Run the databricks connect in a terminal works to execute the spark code in the cluster. But, the VS Code extension does not.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;H3&gt;Note&lt;/H3&gt;&lt;P&gt;It is important to mention that when I run the same Python file with a standard cluster with no docker on it, the VSCode extension works as expected.&lt;/P&gt;</description>
      <pubDate>Sat, 20 Jul 2024 13:51:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/issue-with-vscode-extension-and-databricks-cluster-using-docker/m-p/79550#M7868</guid>
      <dc:creator>danmlopsmaz</dc:creator>
      <dc:date>2024-07-20T13:51:43Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with VSCode Extension and Databricks Cluster Using Docker Image</title>
      <link>https://community.databricks.com/t5/get-started-discussions/issue-with-vscode-extension-and-databricks-cluster-using-docker/m-p/80009#M7870</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp;thanks for a such quick response.&lt;/P&gt;&lt;P&gt;Actually, I am using the Dockerfile from the Databricks runtime example here:&amp;nbsp;&lt;A href="https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile" target="_self"&gt;https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile&lt;/A&gt;&amp;nbsp;. The configuration with the VSCode extensions is fine since I already mentioned that the "upload and run python file" command works with a standard cluster.&lt;/P&gt;&lt;P&gt;This is my Dockerfile:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;# This Dockerfile creates a clean Databricks runtime 12.2 LTS without any library ready to deploy to Databricks
FROM databricksruntime/minimal:14.3-LTS
# These are the versions compatible for DBR 12.x

ARG python_version="3.9"
ARG pip_version="22.3.1"
ARG setuptools_version="65.6.3"
ARG wheel_version="0.38.4"
ARG virtualenv_version="20.16.7"

# Set the debconf frontend to Noninteractive
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

# Installs python 3.x and virtualenv for Spark and Notebooks
RUN sudo apt-get update &amp;amp;&amp;amp; sudo apt-get install dialog apt-utils curl build-essential fuse openssh-server software-properties-common --yes \
    &amp;amp;&amp;amp; sudo add-apt-repository ppa:deadsnakes/ppa -y &amp;amp;&amp;amp; sudo apt-get update \
    &amp;amp;&amp;amp; sudo apt-get install python${python_version} python${python_version}-dev python${python_version}-distutils --yes \
    &amp;amp;&amp;amp; curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
    &amp;amp;&amp;amp; /usr/bin/python${python_version} get-pip.py pip&amp;gt;=${pip_version} setuptools&amp;gt;=${setuptools_version} wheel&amp;gt;=${wheel_version} \
    &amp;amp;&amp;amp; rm get-pip.py \
    &amp;amp;&amp;amp; apt-get clean \
    &amp;amp;&amp;amp; rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

RUN /usr/local/bin/pip${python_version} install --no-cache-dir virtualenv==${virtualenv_version} \
    &amp;amp;&amp;amp; sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python${python_version}/dist-packages/virtualenv/seed/embed/base_embed.py \
    &amp;amp;&amp;amp; /usr/local/bin/pip${python_version} download pip==${pip_version} --dest \
    /usr/local/lib/python${python_version}/dist-packages/virtualenv_support/

# Initialize the default environment that Spark and notebooks will use
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python3 --no-download  --no-setuptools

# These python libraries are used by Databricks notebooks and the Python REPL
# You do not need to install pyspark - it is injected when the cluster is launched
# Versions are intended to reflect latest DBR: https://docs.databricks.com/release-notes/runtime/11.1.html#system-environment
RUN /databricks/python3/bin/pip install \
    six&amp;gt;=1.16.0 \
    jedi&amp;gt;=0.18.1 \
    # ensure minimum ipython version for Python autocomplete with jedi 0.17.x
    ipython&amp;gt;=8.10.0 \
    pyarrow&amp;gt;=8.0.0 \
    ipykernel&amp;gt;=6.17.1 \
    grpcio&amp;gt;=1.48.1 \
    grpcio-status&amp;gt;=1.48.1 \
    databricks-sdk&amp;gt;=0.1.6

# Specifies where Spark will look for the python process
ENV PYSPARK_PYTHON=/databricks/python3/bin/python3
# Specifies Tracking URI for MLflow Integration
ENV MLFLOW_TRACKING_URI='databricks'
# Make sure the USER env variable is set. The files exposed
# by dbfs-fuse will be owned by this user.
# Within the container, the USER is always root.
ENV USER root&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 22 Jul 2024 20:41:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/issue-with-vscode-extension-and-databricks-cluster-using-docker/m-p/80009#M7870</guid>
      <dc:creator>danmlopsmaz</dc:creator>
      <dc:date>2024-07-22T20:41:42Z</dc:date>
    </item>
  </channel>
</rss>

