Difference between revisions of "Python Data Mining"
Jump to navigation
Jump to search
Line 37: | Line 37: | ||
</pre> | </pre> | ||
==[[#top|Back To Top]] - [[Python|Category]]== | ==[[#top|Back To Top]] - [[Python|Category]]== | ||
[[Category:Python]] |
Latest revision as of 16:21, 1 September 2020
Analyze csv files using pandas
import pandas as pd df = pd.read_csv('survey_results_public.csv') schema_df = pd.read_csv('survey_results_schema.csv') pd.set_option('display.max_columns', 85) pd.set_option('display.max_rows', 85) df[['OpenSourcer', 'Employment']].tail(2) df.shape # prints out the number of rows and columns schema_df.tail(2) df.columns # gives you the name of the columns df.iloc[[0,1]] # returns the first and 2nd records with all columns # inner [[0,1]]brackets are rows and the [[],1] df.iloc[0:3,[5,6,7]] # returns the first and 2nd record with specific columns # inner [[0,1]]brackets are rows and the [[],1] #iloc uses index location #loc uses labels df.loc[0:10, 'Hobbyist':'Country'] #loc uses labels df.loc[0:10, ['Hobbyist','Student','Country']] # get a count of yes and no responses df['Hobbyist'].value_counts()