I am trying to find quarter start date from a date column. I get the expected result when i write it using selectExpr() but when i add the same logic in .withColumn() i get TypeError: Column is not iterable
I am using a workaround as follows
workaround:- df=df.selectExpr('*',"date_sub(history_effective_date,dayofmonth(history_effective_date)-1) as history_effective_month")
selectExpr:- df.selectExpr("add_months(history_effective_month,-(month(history_effective_month)%3)+1)","history_effective_month").show(5)
output-
+----------+-----------------------+ | qtr|history_effective_month| +----------+-----------------------+ |2017-07-01| 2017-06-01| |2016-04-01| 2016-05-01| |2015-10-01| 2015-09-01| |2012-01-01| 2012-01-01| |2012-01-01| 2012-01-01| +----------+-----------------------+
withColumn():-
df.withColumn("history_effective_quarter",add_months('history_effective_month',-(month('history_effective_month')%3)+1))
TypeError Traceback (most recent call last) <ipython-input-259-0bb78d27d2a7> in <module>() 1 #pricerxDF.selectExpr("add_months(history_effective_month,-(month(history_effective_month)%3)+1) as qtr","history_effective_month").show(5) ----> 2 pricerxDF.withColumn("history_effective_quarter",add_months('history_effective_month',-(month('history_effective_month')%3)+1))
~/anaconda3/lib/python3.6/site-packages/pyspark/sql/functions.py in add_months(start, months) 968 """ 969 sc = SparkContext._active_spark_context --> 970 return Column(sc._jvm.functions.add_months(_to_java_column(start), months)) 971 972
~/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py in call(self, *args) 1122 1123 def call(self, *args): -> 1124 args_command, temp_args = self._build_args(*args) 1125 1126 command = proto.CALL_COMMAND_NAME +\
~/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py in _build_args(self, *args) 1086 def _build_args(self, *args): 1087 if self.converters is not None and len(self.converters) > 0: -> 1088 (new_args, temp_args) = self._get_args(args) 1089 else: 1090 new_args = args
~/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py in _get_args(self, args) 1073 for converter in self.gateway_client.converters: 1074 if converter.can_convert(arg): -> 1075 temp_arg = converter.convert(arg, self.gateway_client) 1076 temp_args.append(temp_arg) 1077 new_args.append(temp_arg)
~/anaconda3/lib/python3.6/site-packages/py4j/java_collections.py in convert(self, object, gateway_client) 498 ArrayList = JavaClass("java.util.ArrayList", gateway_client) 499 java_list = ArrayList() --> 500 for element in object: 501 java_list.add(element) 502 return java_list
~/anaconda3/lib/python3.6/site-packages/pyspark/sql/column.py in iter(self) 248 249 def iter(self): --> 250 raise TypeError("Column is not iterable") 251 252 # string methods
TypeError: Column is not iterable