My first program in Python
or direct the code
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 21 00:47:23 2016
@author: User
"""
#importing libraries
import pandas
import numpy
#This reads the data into a handy dataframe format.
mydata=pandas.read_csv('addhealth_pds.csv',low_memory=False)
#So that if we enter the command
mydata
# we get a summary of the data's structure
#the number of rows which stands for the observations
print('the population is:')
print(len(mydata))
# the number of columns which stands for the variables
print('The number of variables is:')
print(len(mydata.columns))
#setting variables you will be working with to numeric
mydata['H1WP13'] = mydata['H1WP13'].convert_objects(convert_numeric=True)
#ADDING TITLES
print ('How close do you feel to your father')
print('frequency')
c1 = mydata['H1WP13'].value_counts(sort=False,dropna=False)
print (c1)
print('percentage %')
p1 = mydata['H1WP13'].value_counts(sort=False, normalize=True)
print (p1)
print('cumulative frequency')
ct1= mydata.groupby('H1WP13').size()
print (ct1)
print('cumulative percentage')
pt1 = mydata.groupby('H1WP13').size() * 100 / len(mydata)
print (pt1)
print ('How many have played a sport with their father in the past 4 weeks?')
print('frequency')
c2 = mydata['H1WP18B'].value_counts(sort=False,dropna=False)
print (c2)
print('percentage %')
p2 = mydata['H1WP18B'].value_counts(sort=False, normalize=True)
print (p2)
print('cumulative frequency')
ct2= mydata.groupby('H1WP18B').size()
print (ct2)
print('cumulative percentage')
pt2 = mydata.groupby('H1WP18B').size() * 100 / len(mydata)
print (pt2)
print ('How many have talked about school work or grades with their father in the past 4 weeks?')
print('frequency')
c3 = mydata['H1WP17H'].value_counts(sort=False,dropna=False)
print (c3)
print('percentage %')
p3 = mydata['H1WP17H'].value_counts(sort=False, normalize=True)
print (p3)
print('cumulative frequency')
ct3= mydata.groupby('H1WP17H').size()
print (ct3)
print('cumulative percentage')
pt3 = mydata.groupby('H1WP17H').size() * 100 / len(mydata)
print (pt3)
#subset data to young adults who answered very much
sub1=mydata[(mydata['H1WP13']==5)]
#make a copy of my new subsetted data
sub2 = sub1.copy()
# frequency distritions on new sub2 data frame
print ('counts for very much')
c5 = sub2['H1WP13'].value_counts(sort=False)
print(c5)
print ('percentages for very much')
p5 = sub2['H1WP13'].value_counts(sort=False, normalize=True)
print (p5)
#upper-case all DataFrame column names - place afer code for loading data aboave
mydata.columns = map(str.upper, mydata.columns)
# bug fix for display formats to avoid run time errors - put after code for loading data above
pandas.set_option('display.float_format', lambda x:'%f'%x)
Δεν υπάρχουν σχόλια:
Δημοσίευση σχολίου