@@ -203,6 +203,12 @@ def _apply_padding(self) -> pl.DataFrame:
203203 Column .BALL_OWNING_TEAM_ID ,
204204 ]
205205
206+ user_defined_columns = [
207+ x
208+ for x in df .columns
209+ if x not in keep_columns + group_by_columns + empty_columns
210+ ]
211+
206212 counts = df .group_by (group_by_columns ).agg (
207213 pl .len ().alias ("count" ), * [pl .first (col ).alias (col ) for col in keep_columns ]
208214 )
@@ -233,15 +239,23 @@ def _apply_padding(self) -> pl.DataFrame:
233239 padding_df = pl .DataFrame (padding_rows )
234240
235241 schema = df .schema
242+ print (">>" , df .columns )
243+ print (">>" , padding_df .columns )
244+ print ("keep_columns" , keep_columns )
245+ print ("empty_columns" , empty_columns )
246+ print ("group_by_columns" , group_by_columns )
247+ print ("user_defined_columns" , user_defined_columns )
236248 padding_df = padding_df .with_columns (
237249 [
238250 pl .lit (0.0 if schema [col ] != pl .String else "None" )
239251 .cast (schema [col ])
240252 .alias (col )
241253 for col in empty_columns
242254 ]
255+ +
256+ # Set all user define columns to Null
257+ [pl .lit (None ).cast (schema [col ]).alias (col ) for col in user_defined_columns ]
243258 )
244-
245259 padding_df = padding_df .select (df .columns )
246260
247261 result = pl .concat ([df , padding_df ], how = "vertical" )
0 commit comments