Skip to main content Link Menu Expand (external link) Document Search Copy Copied

Introduction to pandas & NumPy

pandas is a popular data analysis library in Python. You can think of pandas as a back-end Excel tool that can be customized in order to deal with raw data more easily. pandas deals with 1D and 2D (dimensional) arrays.

1. NumPy

import numpy as np
np.array([1,3,5])
array([1, 3, 5])
np.array([[1,3,5], [2,4,6]])
array([[1, 3, 5],
       [2, 4, 6]])
np.zeros([3,6])
array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a.shape
(3, 4)
a = np.array(['a', 'b', 3, 4])
print(a[1:3])
['b' '3']
b = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(b)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
print(b[0:2, 1:3])
[[2 3]
 [6 7]]
print(b[:, 1:3])
[[ 2  3]
 [ 6  7]
 [10 11]]
print(b[1:, :])
[[ 5  6  7  8]
 [ 9 10 11 12]]
print(b[:, :])
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Let’s use NumPy for calculation.

n1 = np.array([3, 6, 9])

print(n1)
print(n1 + 5)
print(n1 - 5)
print(n1 * 5)
print(n1 / 5)
[3 6 9]
[ 8 11 14]
[-2  1  4]
[15 30 45]
[0.6 1.2 1.8]
[3 1 4]
n2 = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print(np.max(n2))
print(np.min(n2))
print(np.mean(n2))
print(np.median(n2))
print(np.var(n2))
print(np.std(n2))
10
1
5.5
5.5
8.25
2.8722813232690143

2. pandas

pandas has two data structures: Series for 1D data and DataFrame for 2D data.

import pandas as pd
# Series

s1 = pd.Series({'Boffin': 100, 'Bella': 90, 'John': 85})
s1.name = 'Literature'
print(s1)
Boffin    100
Bella      90
John       85
Name: Literature, dtype: int64
# DataFrame

pd.DataFrame({
    'age':[10, 42, 21]
})
age
0 10
1 42
2 21
{
    'age':[10, 42, 21]
}
{'age': [10, 42, 21]}
{
  'age':[10, 42, 10],
  'name': ['Boffin', 'Bella', 'John'],
  'gender': ['male', 'female', 'male']
}
{'age': [10, 42, 10],
 'gender': ['male', 'female', 'male'],
 'name': ['Boffin', 'Bella', 'John']}
pd.DataFrame({
  'age':[10, 42, 21]
}).join(
pd.DataFrame({
  'name': ['Boffin', 'Bella', 'John']
})
)
age name
0 10 Boffin
1 42 Bella
2 21 John
pd.DataFrame({
  'age':[10, 42, 21]
  }).join(
      pd.DataFrame({
        'name': ['Boffin', 'Bella', 'John']
        })
    ).join(
      pd.DataFrame({
        'city': ['Bryan', 'Seoul', 'London']
      })
)
age name city
0 10 Boffin Bryan
1 42 Bella Seoul
2 21 John London
pd.DataFrame({
  'age':[10, 42, 21]
}).append(
pd.DataFrame({
  'name': ['Boffin', 'Bella', 'John']
})
)
age name
0 10.0 NaN
1 42.0 NaN
2 21.0 NaN
0 NaN Boffin
1 NaN Bella
2 NaN John
pd.DataFrame({
  'age':[10, 42, 21],
  'name': ['Boffin', 'Bella', 'John']
}).merge(
pd.DataFrame({
  'name': ['Boffin', 'Bella', 'John'],
  'city': ['Bryan', 'Seoul', 'London']
}), on=['name']
)
age name city
0 10 Boffin Bryan
1 42 Bella Seoul
2 21 John London