DataCollector: Exercise for the week 2

My first program in Python

https://drive.google.com/open?id=0BwzT1QOuEXN6QzdPSHdqQWhuc2M

or direct the code

# -*- coding: utf-8 -*-

"""

Created on Sun Aug 21 00:47:23 2016

@author: User

"""

#importing libraries

import pandas

import numpy

#This reads the data into a handy dataframe format.

mydata=pandas.read_csv('addhealth_pds.csv',low_memory=False)

#So that if we enter the command

mydata

# we get a summary of the data's structure

#the number of rows which stands for the observations

print('the population is:')

print(len(mydata))

# the number of columns which stands for the variables

print('The number of variables is:')

print(len(mydata.columns))

#setting variables you will be working with to numeric

mydata['H1WP13'] = mydata['H1WP13'].convert_objects(convert_numeric=True)

#ADDING TITLES

print ('How close do you feel to your father')

print('frequency')

c1 = mydata['H1WP13'].value_counts(sort=False,dropna=False)

print (c1)

print('percentage %')

p1 = mydata['H1WP13'].value_counts(sort=False, normalize=True)

print (p1)

print('cumulative frequency')

ct1= mydata.groupby('H1WP13').size()

print (ct1)

print('cumulative percentage')

pt1 = mydata.groupby('H1WP13').size() * 100 / len(mydata)

print (pt1)

print ('How many have played a sport with their father in the past 4 weeks?')

print('frequency')

c2 = mydata['H1WP18B'].value_counts(sort=False,dropna=False)

print (c2)

print('percentage %')

p2 = mydata['H1WP18B'].value_counts(sort=False, normalize=True)

print (p2)

print('cumulative frequency')

ct2= mydata.groupby('H1WP18B').size()

print (ct2)

print('cumulative percentage')

pt2 = mydata.groupby('H1WP18B').size() * 100 / len(mydata)

print (pt2)

print ('How many have talked about school work or grades with their father in the past 4 weeks?')

print('frequency')

c3 = mydata['H1WP17H'].value_counts(sort=False,dropna=False)

print (c3)

print('percentage %')

p3 = mydata['H1WP17H'].value_counts(sort=False, normalize=True)

print (p3)

print('cumulative frequency')

ct3= mydata.groupby('H1WP17H').size()

print (ct3)

print('cumulative percentage')

pt3 = mydata.groupby('H1WP17H').size() * 100 / len(mydata)

print (pt3)

#subset data to young adults who answered very much

sub1=mydata[(mydata['H1WP13']==5)]

#make a copy of my new subsetted data

sub2 = sub1.copy()

# frequency distritions on new sub2 data frame

print ('counts for very much')

c5 = sub2['H1WP13'].value_counts(sort=False)

print(c5)

print ('percentages for very much')

p5 = sub2['H1WP13'].value_counts(sort=False, normalize=True)

print (p5)

#upper-case all DataFrame column names - place afer code for loading data aboave

mydata.columns = map(str.upper, mydata.columns)

# bug fix for display formats to avoid run time errors - put after code for loading data above

pandas.set_option('display.float_format', lambda x:'%f'%x)

DataCollector

Κυριακή 21 Αυγούστου 2016

Exercise for the week 2

My first program in Python

Δεν υπάρχουν σχόλια:

Δημοσίευση σχολίου