import pandas as pd
%%
import pandas as pd
%%
df = pd.read_csv('C:\Data\sample_data\california_housing_test.csv')
%%
df
%%
Select 3 columns into into a new dataframe
df1 = df[['longitude','latitude','housing_median_age']]
df1
%%
More than one condition in more than one column
Use one set of conditions or another. The OR operator.
Put this into a new dataframe df2
df2 = df1[
(df1['housing_median_age']==30)
& (df1['latitude']>=38.00)
|
(df1['housing_median_age']==40)
& (df1['latitude']>=38.00)
]
df2
%%
Group By 1
df2.groupby(['housing_median_age'])['housing_median_age'].count()
%%
Group By 2
df2.groupby(['housing_median_age']).size().reset_index(name='counts')