df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) print(df) # Output: # A B # 0 1 4 # 1 2 5 # 2 3 6
df['C'] = [7, 8, 9] print(df) # Output: # A B C # 0 1 4 7 # 1 2 5 8 # 2 3 6 9
df['C'] = 1 print(df) # Output: # A B C # 0 1 4 1 # 1 2 5 1 # 2 3 6 1
df['C'] = df['A'] + df['B'] # print(df) # Output: # A B C # 0 1 4 5 # 1 2 5 7 # 2 3 6 9 df['C'] = df['A']**df['B'] print(df) # Output: # A B C # 0 1 4 1 # 1 2 5 32 # 2 3 6 729
操作是按组件计算的,因此如果我们将列作为列表
a = [1, 2, 3] b = [4, 5, 6]
最后一个表达式中的列将获得为
c = [x**y for (x,y) in zip(a,b)] print(c) # Output: # [1, 32, 729]
df_means = df.assign(D=[10, 20, 30]).mean() print(df_means) # Output: # A 2.0 # B 5.0 # C 7.0 # D 20.0 # adds a new column D before taking the mean # dtype: float64
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) df[['A2','B2']] = np.square(df) print(df) # Output: # A B A2 B2 # 0 1 4 1 16 # 1 2 5 4 25 # 2 3 6 9 36
new_df = df.assign(A3=df.A*df.A2, B3=5*df.B) print(new_df) # Output: # A B A2 B2 A3 B3 # 0 1 4 1 16 1 20 # 1 2 5 4 25 8 25 # 2 3 6 9 36 27 30