Can you try to provide the mandatory parameters in the bucketizer. Even though in docs it is mentioned as optional. I see it works when the provide the parameters splits, inputcol and outputcol
from pyspark.sql import SparkSession
from pyspark.ml.feature import Bucketizer
# Initialize SparkSession with error handling
try:
    spark = SparkSession.builder.appName("BucketizerTest").getOrCreate()
    print(f"Spark version: {spark.version}")  # Verify SparkSession
except Exception as e:
    print(f"Failed to initialize SparkSession: {e}")
    raise
# Create a sample DataFrame
data = [(1, -0.5), (2, 0.0), (3, 1.5), (4, 3.0)]
df = spark.createDataFrame(data, ["id", "value"])
# Define splits for bucketizing
splits = [float("-inf"), 0.0, 1.0, 2.0, float("inf")]
# Initialize Bucketizer with required parameters
try:
    bucketizer = Bucketizer(
        splits=splits,
        inputCol="value",
        outputCol="bucket"
    )
    # Apply Bucketizer to DataFrame
    bucketed_df = bucketizer.transform(df)
    bucketed_df.show()
except Exception as e:
    print(f"Error with Bucketizer: {e}")
    raise
# Optional: Stop SparkSession (only if needed)
# spark.stop()