Python Data Mining

From rbachwiki
Jump to navigation Jump to search

Analyze csv files using pandas


import pandas as pd


df = pd.read_csv('survey_results_public.csv')
schema_df = pd.read_csv('survey_results_schema.csv')

pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

df[['OpenSourcer', 'Employment']].tail(2)

df.shape # prints out the number of rows and columns

schema_df.tail(2)

df.columns # gives you the name of the columns

df.iloc[[0,1]] # returns the first  and 2nd records with all columns
# inner [[0,1]]brackets are rows and the [[],1]


df.iloc[0:3,[5,6,7]] # returns the first  and 2nd record with specific columns
# inner [[0,1]]brackets are rows and the [[],1]
#iloc uses index location

#loc uses labels
df.loc[0:10, 'Hobbyist':'Country']

#loc uses labels
df.loc[0:10, ['Hobbyist','Student','Country']]

# get a count of yes and no responses
df['Hobbyist'].value_counts()