Κυριακή 21 Αυγούστου 2016

Exercise for the week 2

My first program in Python


or direct the code

# -*- coding: utf-8 -*-
"""
Created on Sun Aug 21 00:47:23 2016

@author: User
"""

#importing libraries
import pandas
import numpy

#This reads the data into a handy dataframe format. 

mydata=pandas.read_csv('addhealth_pds.csv',low_memory=False)

#So that if we enter the command

mydata
# we get a summary of the data's structure

#the number of rows which stands for the observations 
print('the population is:')
print(len(mydata))
# the number of columns which stands for the variables
print('The number of variables is:')
print(len(mydata.columns))
#setting variables you will be working with to numeric
mydata['H1WP13'] = mydata['H1WP13'].convert_objects(convert_numeric=True)



#ADDING TITLES



print ('How close do you feel to your father')
print('frequency')
c1 = mydata['H1WP13'].value_counts(sort=False,dropna=False)
print (c1)

print('percentage %')
p1 = mydata['H1WP13'].value_counts(sort=False, normalize=True)
print (p1)

print('cumulative frequency')
ct1= mydata.groupby('H1WP13').size()
print (ct1)

print('cumulative percentage')
pt1 = mydata.groupby('H1WP13').size() * 100 / len(mydata)
print (pt1)

print ('How many have played a sport with their father in the past 4 weeks?')
print('frequency')
c2 = mydata['H1WP18B'].value_counts(sort=False,dropna=False)
print (c2)

print('percentage %')
p2 = mydata['H1WP18B'].value_counts(sort=False, normalize=True)
print (p2)

print('cumulative frequency')
ct2= mydata.groupby('H1WP18B').size()
print (ct2)

print('cumulative percentage')
pt2 = mydata.groupby('H1WP18B').size() * 100 / len(mydata)
print (pt2)

print ('How many have talked about school work or grades with their father in the past 4 weeks?')
print('frequency')
c3 = mydata['H1WP17H'].value_counts(sort=False,dropna=False)
print (c3)

print('percentage %')
p3 = mydata['H1WP17H'].value_counts(sort=False, normalize=True)
print (p3)

print('cumulative frequency')
ct3= mydata.groupby('H1WP17H').size()
print (ct3)

print('cumulative percentage')
pt3 = mydata.groupby('H1WP17H').size() * 100 / len(mydata)
print (pt3)


#subset data to young adults who answered very much
sub1=mydata[(mydata['H1WP13']==5)]

#make a copy of my new subsetted data
sub2 = sub1.copy()

# frequency distritions on new sub2 data frame
print ('counts for very much')
c5 = sub2['H1WP13'].value_counts(sort=False)
print(c5)

print ('percentages for very much')
p5 = sub2['H1WP13'].value_counts(sort=False, normalize=True)
print (p5)


#upper-case all DataFrame column names - place afer code for loading data aboave
mydata.columns = map(str.upper, mydata.columns)

# bug fix for display formats to avoid run time errors - put after code for loading data above
pandas.set_option('display.float_format', lambda x:'%f'%x)

 

Δεν υπάρχουν σχόλια:

Δημοσίευση σχολίου