import pandas as pd


# import the CSV file
df = pd.read_csv("StudentsPerformance.csv")


### edTest(test_a) ###
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


### edTest(test_b) ###
cols = df.columns
# Check at least the type is the right one
assert type(cols) == pd.core.indexes.base.Index


### edTest(test_c) ###
num_rows = df.shape[0]
num_cols = df.shape[1]


### edTest(test_d) ###
first_seven = df.head(7)


### edTest(test_e) ###
last_four = df.tail(4)


### edTest(test_f) ###
unique_parental_education_levels = df['parental level of education'].unique()
unique_parental_education_levels

array(["bachelor's degree", 'some college', "master's degree",
       "associate's degree", 'high school', 'some high school'],
      dtype=object)


# we can check if they are really 6
print('Are there 6 unique values?:', len(unique_parental_education_levels) == 6)
# we can display them here
print()
print(unique_parental_education_levels)

Are there 6 unique values?: True

["bachelor's degree" 'some college' "master's degree" "associate's degree"
 'high school' 'some high school']


### edTest(test_g) ###
scored_100_at_math = df[df["math score"]==100]
scored_100_at_math


# this shows that using the condition with 'loc' is the same as not using 'loc'
condition = df['math score'] == 100
all(df[condition] == df.loc[condition])

True


# the condition is a boolean series to be used with the whole dataframe
# the condition will be used to filter the dataframe to those rows where condition value is True
# we can see that condition has as many rows as the original dataframe
condition

0      False
1      False
2      False
3      False
4      False
       ...  
995    False
996    False
997    False
998    False
999    False
Name: math score, Length: 1000, dtype: bool


### edTest(test_h) ###
df.describe()


### edTest(test_i) ###
# not completed preparation course condition
cond1 = df['test preparation course'] != 'completed'
# writing score above the median
cond2 = df['writing score'] >  df['writing score'].median()
uncompleted_with_good_writing_score = df.loc[cond1 & cond2]
uncompleted_with_good_writing_score

	math score	reading score	writing score
count	1000.00000	1000.000000	1000.000000
mean	66.08900	69.169000	68.054000
std	15.16308	14.600192	15.195657
min	0.00000	17.000000	10.000000
25%	57.00000	59.000000	57.750000
50%	66.00000	70.000000	69.000000
75%	77.00000	79.000000	79.000000
max	100.00000	100.000000	100.000000

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
0	female	group B	bachelor's degree	standard	none	72	72	74
2	female	group B	master's degree	standard	none	90	95	93
4	male	group C	some college	standard	none	76	78	75
5	female	group B	associate's degree	standard	none	71	83	78
12	female	group B	high school	standard	none	65	81	73
...	...	...	...	...	...	...	...	...
981	male	group D	some high school	standard	none	81	78	78
984	female	group C	some high school	standard	none	74	75	82
992	female	group D	associate's degree	free/reduced	none	55	76	76
993	female	group D	bachelor's degree	free/reduced	none	62	72	74
999	female	group D	some college	free/reduced	none	77	86	86

CS109a Introduction to Data Science

Lecture 2, Exercise 1: PANDAS Intro¶

PANDAS Basics 🐼¶

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
149	male	group E	associate's degree	free/reduced	completed	100	100	93
451	female	group E	some college	standard	none	100	92	97
458	female	group E	bachelor's degree	standard	none	100	100	100
623	male	group A	some college	standard	completed	100	96	86
625	male	group D	some college	standard	completed	100	97	99
916	male	group E	bachelor's degree	standard	completed	100	100	100
962	female	group E	associate's degree	standard	none	100	100	100