概述
s = pd.Series([[0,1],[2,3,4]],index=[2011,2012]) s Out[103]: 2011 [0,1] 2012 [2,4] dtype: object
s.apply(pd.Series).stack() Out[104]: 2011 0 0.0 1 1.0 2012 0 2.0 1 3.0 2 4.0 dtype: float64
a = list(range(14)) b = list(range(20,34)) df = pd.DataFrame({'ID': [11111,11111,11112,11112],'Year': [2011,2012,2011,2012],'A': [a[:3],a[3:7],a[7:10],a[10:14]],'B': [b[:3],b[3:7],b[7:10],b[10:14]]}) df Out[108]: A B ID Year 0 [0,1,2] [20,21,22] 11111 2011 1 [3,4,5,6] [23,24,25,26] 11111 2012 2 [7,8,9] [27,28,29] 11112 2011 3 [10,11,12,13] [30,31,32,33] 11112 2012
# set an index (each column will inherit it) df2 = df.set_index(['ID','Year']) # the trick unnested_lst = [] for col in df2.columns: unnested_lst.append(df2[col].apply(pd.Series).stack()) result = pd.concat(unnested_lst,axis=1,keys=df2.columns)
result Out[115]: A B ID Year 11111 2011 0 0.0 20.0 1 1.0 21.0 2 2.0 22.0 2012 0 3.0 23.0 1 4.0 24.0 2 5.0 25.0 3 6.0 26.0 11112 2011 0 7.0 27.0 1 8.0 28.0 2 9.0 29.0 2012 0 10.0 30.0 1 11.0 31.0 2 12.0 32.0 3 13.0 33.0
# DatetimeIndex years = pd.to_datetime(result.index.get_level_values(1).astype(str)) # timedeltaIndex days = pd.to_timedelta(result.index.get_level_values(2),unit='D') # If the above line doesn't work (a bug in pandas),try this: # days = result.index.get_level_values(2).astype('timedelta64[D]') # the sum is again a DatetimeIndex dates = years + days dates.name = 'Date' new_index = pd.MultiIndex.from_arrays([result.index.get_level_values(0),dates]) result.index = new_index result Out[130]: A B ID Date 11111 2011-01-01 0.0 20.0 2011-01-02 1.0 21.0 2011-01-03 2.0 22.0 2012-01-01 3.0 23.0 2012-01-02 4.0 24.0 2012-01-03 5.0 25.0 2012-01-04 6.0 26.0 11112 2011-01-01 7.0 27.0 2011-01-02 8.0 28.0 2011-01-03 9.0 29.0 2012-01-01 10.0 30.0 2012-01-02 11.0 31.0 2012-01-03 12.0 32.0 2012-01-04 13.0 33.0
总结
以上是编程之家为你收集整理的python – 从Pandas Dataframe单元格中拆分多行的嵌套数组值全部内容,希望文章能够帮你解决python – 从Pandas Dataframe单元格中拆分多行的嵌套数组值所遇到的程序开发问题。
如果您也喜欢它,动动您的小指点个赞吧