panda - Jupyter Notebook http://localhost:8888/notebooks/panda/panda.ipynb?#SHuffling-D
Imports
In [18]: import pandas as pd
Series = 1D
In [4]: series =pd.Series(["BMW","Toyota","Subaru"])
series
Out[4]: 0 BMW
1 Toyota
2 Subaru
dtype: object
In [5]: Colours = pd.Series(["Red","Blue","Black"])
Colours
Out[5]: 0 Red
1 Blue
2 Black
dtype: object
Data Frames = 2D
In [6]: df =pd.DataFrame({"Car make": series, "Colour":Colours})
df
Out[6]: Car make Colour
0 BMW Red
1 Toyota Blue
2 Subaru Black
importing database
1 of 21 1/2/2023, 2:49
,panda - Jupyter Notebook http://localhost:8888/notebooks/panda/panda.ipynb?#SHuffling-D
In [4]: carSales =pd.read_csv("car‐sales.csv")
carSales
Out[4]: Make Colour Odometer (KM) Doors Price
0 Toyota White 150043 4 $4,000.00
1 Honda Red 87899 4 $5,000.00
2 Toyota Blue 32549 3 $7,000.00
3 BMW Black 11179 5 $22,000.00
4 Nissan White 213095 4 $3,500.00
5 Toyota Green 99213 4 $4,500.00
6 Honda Blue 45698 4 $7,500.00
7 Honda Blue 54738 4 $7,000.00
8 Toyota White 60000 4 $6,250.00
9 Nissan White 31600 4 $9,700.00
Exporting Data Frames
In [93]: carSales.to_csv("Exported‐car‐sales.csv",index =False)
Column Names
In [16]: # identifying the types of each column
carSales.dtypes
Out[16]: Make object
Colour object
Odometer (KM) int64
Doors int64
Price object
dtype: object
In [19]: ## Creating a list of column names
car_columns = carSales.columns
car_columns
Out[19]: Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')
index
2 of 21 1/2/2023, 2:49
,panda - Jupyter Notebook http://localhost:8888/notebooks/panda/panda.ipynb?#SHuffling-D
In [21]: carSales.index
Out[21]: RangeIndex(start=0, stop=10, step=1)
Describe
In [24]: # it gives us numerical information of our DF (The numerical comlumns)
In [23]: carSales.describe()
Out[23]: Odometer (KM) Doors
count 10.000000 10.000000
mean 78601.400000 4.000000
std 61983.471735 0.471405
min 11179.000000 3.000000
25% 35836.250000 4.000000
50% 57369.000000 4.000000
75% 96384.500000 4.000000
max 213095.000000 5.000000
info
In [25]: #General info of our DF
carSales.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
# Column Non‐Null Count Dtype
‐‐‐ ‐‐‐‐‐‐ ‐‐‐‐‐‐‐‐‐‐‐‐‐‐ ‐‐‐‐‐
0 Make 10 non‐null object
1 Colour 10 non‐null object
2 Odometer (KM) 10 non‐null int64
3 Doors 10 non‐null int64
4 Price 10 non‐null object
dtypes: int64(2), object(3)
memory usage: 528.0+ bytes
mean
3 of 21 1/2/2023, 2:49
Imports
In [18]: import pandas as pd
Series = 1D
In [4]: series =pd.Series(["BMW","Toyota","Subaru"])
series
Out[4]: 0 BMW
1 Toyota
2 Subaru
dtype: object
In [5]: Colours = pd.Series(["Red","Blue","Black"])
Colours
Out[5]: 0 Red
1 Blue
2 Black
dtype: object
Data Frames = 2D
In [6]: df =pd.DataFrame({"Car make": series, "Colour":Colours})
df
Out[6]: Car make Colour
0 BMW Red
1 Toyota Blue
2 Subaru Black
importing database
1 of 21 1/2/2023, 2:49
,panda - Jupyter Notebook http://localhost:8888/notebooks/panda/panda.ipynb?#SHuffling-D
In [4]: carSales =pd.read_csv("car‐sales.csv")
carSales
Out[4]: Make Colour Odometer (KM) Doors Price
0 Toyota White 150043 4 $4,000.00
1 Honda Red 87899 4 $5,000.00
2 Toyota Blue 32549 3 $7,000.00
3 BMW Black 11179 5 $22,000.00
4 Nissan White 213095 4 $3,500.00
5 Toyota Green 99213 4 $4,500.00
6 Honda Blue 45698 4 $7,500.00
7 Honda Blue 54738 4 $7,000.00
8 Toyota White 60000 4 $6,250.00
9 Nissan White 31600 4 $9,700.00
Exporting Data Frames
In [93]: carSales.to_csv("Exported‐car‐sales.csv",index =False)
Column Names
In [16]: # identifying the types of each column
carSales.dtypes
Out[16]: Make object
Colour object
Odometer (KM) int64
Doors int64
Price object
dtype: object
In [19]: ## Creating a list of column names
car_columns = carSales.columns
car_columns
Out[19]: Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')
index
2 of 21 1/2/2023, 2:49
,panda - Jupyter Notebook http://localhost:8888/notebooks/panda/panda.ipynb?#SHuffling-D
In [21]: carSales.index
Out[21]: RangeIndex(start=0, stop=10, step=1)
Describe
In [24]: # it gives us numerical information of our DF (The numerical comlumns)
In [23]: carSales.describe()
Out[23]: Odometer (KM) Doors
count 10.000000 10.000000
mean 78601.400000 4.000000
std 61983.471735 0.471405
min 11179.000000 3.000000
25% 35836.250000 4.000000
50% 57369.000000 4.000000
75% 96384.500000 4.000000
max 213095.000000 5.000000
info
In [25]: #General info of our DF
carSales.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
# Column Non‐Null Count Dtype
‐‐‐ ‐‐‐‐‐‐ ‐‐‐‐‐‐‐‐‐‐‐‐‐‐ ‐‐‐‐‐
0 Make 10 non‐null object
1 Colour 10 non‐null object
2 Odometer (KM) 10 non‐null int64
3 Doors 10 non‐null int64
4 Price 10 non‐null object
dtypes: int64(2), object(3)
memory usage: 528.0+ bytes
mean
3 of 21 1/2/2023, 2:49