! pip install pandas

Requirement already satisfied: pandas in c:\python39\lib\site-packages (1.5.3)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\huzai\appdata\roaming\python\python39\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\python39\lib\site-packages (from pandas) (2022.7.1)
Requirement already satisfied: numpy>=1.20.3 in c:\python39\lib\site-packages (from pandas) (1.24.1)
Requirement already satisfied: six>=1.5 in c:\users\huzai\appdata\roaming\python\python39\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)


! conda install pandas

'conda' is not recognized as an internal or external command,
operable program or batch file.


import pandas as pd


import pandas as pd

df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a', 'b'])

df


import pandas as pd
import seaborn as sns


df = sns.load_dataset('iris')

df


df.head()


df.head(10)


df.tail()


df.tail(10)


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


df.describe()


df.shape

(150, 5)


df.sample(frac=0.1).shape

(15, 5)


df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')


df.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object


df.select_dtypes(include=['number']).head()


df.select_dtypes(include=['object']).head()


df.select_dtypes(include=['object','number']).head()


df.select_dtypes(exclude=['object']).head()


df.index

RangeIndex(start=0, stop=150, step=1)


df.head().T


df.head()


df.rename(columns={'sepal_length':'sepal_length_A'})


df.columns=df.columns.str.replace('_',' ')
df.head()


# Adding Prefix
df=df.add_prefix('abu_')
df.head()


# Adding suffix
df=df.add_suffix('_420')
df.head()


df= sns.load_dataset('iris')


df.dtypes['sepal_length']

dtype('float64')


df['sepal_length'].astype(int)
df['sepal_length']

0      5.1
1      4.9
2      4.7
3      4.6
4      5.0
      ... 
145    6.7
146    6.3
147    6.5
148    6.2
149    5.9
Name: sepal_length, Length: 150, dtype: float64


df1=pd.DataFrame({'col_A':['1','2','3','4','5','6','7'],'col_B':['11','12','13','14','15','16','17']})
df1.dtypes

col_A    object
col_B    object
dtype: object


df1.astype({'col_A':'int64', 'col_B':'int64'})


df.drop(columns='sepal_length')


df.sort_values(by='sepal_width', ascending=False).head()


df.groupby(by=['sepal_length','species']).count()


df.groupby('who').count()


df.groupby('who').size()

who
child     83
man      537
woman    271
dtype: int64


df.groupby('who').size().reset_index(name='counts')


df.groupby('who').sum()

C:\Users\huzai\AppData\Local\Temp\ipykernel_12200\2979829937.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  df.groupby('who').sum()


df.groupby(['sex','pclass','who']).count()


df.describe().loc['min':'max',['survived','age','fare']]


df1 = pd.DataFrame({'key': [1, 2, 3], 'A': [4, 5, 6]})
df2 = pd.DataFrame({'key': [2, 3, 4], 'B': [7, 8, 9]})
pd.merge(df1, df2)


df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']})

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']})

pd.concat([df1, df2])


df.to_excel('phool.xlsx')


df.loc[::-1].head()


df.loc[:, ::-1].head()


df=pd.read_clipboard()
df


import pandas as pd
import seaborn as sns

df2= sns.load_dataset('titanic')


from  random import random
kashti_1=df.sample(frac=0.50,random_state=1)
kashti_1.shape

(446, 15)


kashti_2=df.drop(kashti_1.index)
kashti_2.shape

(445, 15)


kashti_1.head()


kashti_2.head()


df4=kashti_1.append(kashti_2)
df4.shape

C:\Users\huzai\AppData\Local\Temp\ipykernel_12200\1148480633.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  df4=kashti_1.append(kashti_2)

(891, 15)


df.head()


df.sex.unique()

array(['male', 'female'], dtype=object)


df[(df.sex=='female')].head()


df[((df.embark_town=='Southampton') |
    (df.embark_town=='Queenstown')) & 
    (df.sex=='female')
    ].count()

survived       239
pclass         239
sex            239
age            198
sibsp          239
parch          239
fare           239
embarked       239
class          239
who            239
adult_male     239
deck            58
embark_town    239
alive          239
alone          239
dtype: int64


df[df.embark_town.isin(['Queenstown'])].head()


df[(df.age > 30)].shape

(305, 15)


df[(df.who == 'child')].shape

(83, 15)


df[(df.alive == 'no') & (df.who == 'child')].shape

(34, 15)


df[(df.alive == 'yes') & (df.who == 'child')].head(10)


df.embark_town.value_counts()

Southampton    644
Cherbourg      168
Queenstown      77
Name: embark_town, dtype: int64


df.age.value_counts().nlargest(5)

24.0    30
22.0    27
18.0    26
19.0    25
28.0    25
Name: age, dtype: int64


counts=df.who.value_counts()
counts.nlargest(5).index

Index(['man', 'woman', 'child'], dtype='object')


counts=df.who.value_counts()
counts.nlargest(5)

man      537
woman    271
child     83
Name: who, dtype: int64


df5=pd.DataFrame({'name':['Hunzala Tahir','Huraira Tariq','Huzaifa Tahir','Ibraheem Tahir'],
                'location':['Fasilabad , Pakistan','Islamabad , Pakistan','Lahore , Pakistan','Gojra , Pakistan'],
                    })
df5


df5[['f_name','l_name']]=df5.name.str.split(' ',expand=True)
df5


df5[['city','country']]=df5.location.str.split(',',expand=True)
df5


df5=df5[['f_name','l_name','city','country']]
df5


df['sex_num']=df.sex.map({'male':0,'female':1})
df.head()


df['embarked_num']=df.embarked.factorize()[0]
df.head(15)


fasla=pd.DataFrame([['12345',100,200,300],['34567',400,500,600],['56789',700,800,900]],columns=['zip','factroy','warehouse','retail'])
fasla.head()


fasla_long=fasla.melt(id_vars='zip',var_name='location',value_name='distance')
fasla_long.head()


df4 = pd.DataFrame({'A': [1, 2, 3, 4, 5],
                   'B': [10, 20, 30, 40, 50]})

df4.loc[0:2]


df4.loc[:, 'A']

0    1
1    2
2    3
3    4
4    5
Name: A, dtype: int64


df4.loc[0:2, 'A']

0    1
1    2
2    3
Name: A, dtype: int64


df4.iloc[2]

A     3
B    30
Name: 2, dtype: int64


df4.iloc[2:5, 0:2]


df11 = pd.DataFrame({'A': [1, 2, 3, 4, 5],
                   'B': [10, 20, 30, 40, 50]})


df11.query("A > 2 and B < 40")

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
5	5.4	3.9	1.7	0.4	setosa
6	4.6	3.4	1.4	0.3	setosa
7	5.0	3.4	1.5	0.2	setosa
8	4.4	2.9	1.4	0.2	setosa
9	4.9	3.1	1.5	0.1	setosa

	sepal_length	sepal_width	petal_length	petal_width	species
140	6.7	3.1	5.6	2.4	virginica
141	6.9	3.1	5.1	2.3	virginica
142	5.8	2.7	5.1	1.9	virginica
143	6.8	3.2	5.9	2.3	virginica
144	6.7	3.3	5.7	2.5	virginica
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	sepal_length	sepal_width	petal_length	petal_width
count	150.000000	150.000000	150.000000	150.000000
mean	5.843333	3.057333	3.758000	1.199333
std	0.828066	0.435866	1.765298	0.762238
min	4.300000	2.000000	1.000000	0.100000
25%	5.100000	2.800000	1.600000	0.300000
50%	5.800000	3.000000	4.350000	1.300000
75%	6.400000	3.300000	5.100000	1.800000
max	7.900000	4.400000	6.900000	2.500000

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	adult_male	deck	embark_town	alive	alone
who
child	83	83	83	83	83	83	83	83	83	83	13	83	83	83
man	537	537	537	413	537	537	537	537	537	537	99	537	537	537
woman	271	271	271	218	271	271	271	269	271	271	91	269	271	271

	survived	pclass	age	sibsp	parch	fare	adult_male	alone
who
child	49	218	528.67	144	105	2721.2210	0	6
man	88	1274	13700.50	159	82	13352.0656	537	410
woman	205	565	6976.00	163	153	12620.6627	0	121

			survived	age	sibsp	parch	fare	embarked	class	adult_male	deck	embark_town	alive	alone
sex	pclass	who
female	1	child	3	3	3	3	3	3	3	3	3	3	3	3
	1	woman	91	82	91	91	91	89	91	91	78	89	91	91
	2	child	10	10	10	10	10	10	10	10	1	10	10	10
	2	woman	66	64	66	66	66	66	66	66	9	66	66	66
	3	child	30	30	30	30	30	30	30	30	2	30	30	30
	3	woman	114	72	114	114	114	114	114	114	4	114	114	114
male	1	child	3	3	3	3	3	3	3	3	3	3	3	3
	1	man	119	98	119	119	119	119	119	119	91	119	119	119
	2	child	9	9	9	9	9	9	9	9	3	9	9	9
	2	man	99	90	99	99	99	99	99	99	3	99	99	99
	3	child	28	28	28	28	28	28	28	28	1	28	28	28
	3	man	319	225	319	319	319	319	319	319	5	319	319	319

	survived	age	fare
min	0.0	0.420	0.0000
25%	0.0	20.125	7.9104
50%	0.0	28.000	14.4542
75%	1.0	38.000	31.0000
max	1.0	80.000	512.3292

	A	B	C	D
0	A0	B0	C0	D0
1	A1	B1	C1	D1
2	A2	B2	C2	D2
3	A3	B3	C3	D3
0	A4	B4	C4	D4
1	A5	B5	C5	D5
2	A6	B6	C6	D6
3	A7	B7	C7	D7

	survived	pclass	sex	age	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
862	1	1	female	48.0	0	25.9292	S	First	woman	False	D	Southampton	yes	True
223	0	3	male	NaN	0	7.8958	S	Third	man	True	NaN	Southampton	no	True
84	1	2	female	17.0	0	10.5000	S	Second	woman	False	NaN	Southampton	yes	True
680	0	3	female	NaN	0	8.1375	Q	Third	woman	False	NaN	Queenstown	no	True
535	1	2	female	7.0	2	26.2500	S	Second	child	False	NaN	Southampton	yes	False

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
1	1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False
7	0	3	male	2.0	3	1	21.0750	S	Third	child	False	NaN	Southampton	no	False
10	1	3	female	4.0	1	1	16.7000	S	Third	child	False	G	Southampton	yes	False
15	1	2	female	55.0	0	0	16.0000	S	Second	woman	False	NaN	Southampton	yes	True
18	0	3	female	31.0	1	0	18.0000	S	Third	woman	False	NaN	Southampton	no	False

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

	key	A	B
0	2	5	7
1	3	6	8

	name	location
0	Hunzala Tahir	Fasilabad , Pakistan
1	Huraira Tariq	Islamabad , Pakistan
2	Huzaifa Tahir	Lahore , Pakistan
3	Ibraheem Tahir	Gojra , Pakistan

	name	location	f_name	l_name
0	Hunzala Tahir	Fasilabad , Pakistan	Hunzala	Tahir
1	Huraira Tariq	Islamabad , Pakistan	Huraira	Tariq
2	Huzaifa Tahir	Lahore , Pakistan	Huzaifa	Tahir
3	Ibraheem Tahir	Gojra , Pakistan	Ibraheem	Tahir

	name	location	f_name	l_name	city	country
0	Hunzala Tahir	Fasilabad , Pakistan	Hunzala	Tahir	Fasilabad	Pakistan
1	Huraira Tariq	Islamabad , Pakistan	Huraira	Tariq	Islamabad	Pakistan
2	Huzaifa Tahir	Lahore , Pakistan	Huzaifa	Tahir	Lahore	Pakistan
3	Ibraheem Tahir	Gojra , Pakistan	Ibraheem	Tahir	Gojra	Pakistan

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	A	B	C	D
0	A0	B0	C0	D0
1	A1	B1	C1	D1
2	A2	B2	C2	D2
3	A3	B3	C3	D3
0	A4	B4	C4	D4
1	A5	B5	C5	D5
2	A6	B6	C6	D6
3	A7	B7	C7	D7

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
5	0	3	male	NaN	0	0	8.4583	Q	Third	man	True	NaN	Queenstown	no	True
16	0	3	male	2.0	4	1	29.1250	Q	Third	child	False	NaN	Queenstown	no	False
22	1	3	female	15.0	0	0	8.0292	Q	Third	child	False	NaN	Queenstown	yes	True
28	1	3	female	NaN	0	0	7.8792	Q	Third	woman	False	NaN	Queenstown	yes	True
32	1	3	female	NaN	0	0	7.7500	Q	Third	woman	False	NaN	Queenstown	yes	True

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
9	1	2	female	14.00	1	0	30.0708	C	Second	child	False	NaN	Cherbourg	yes	False
10	1	3	female	4.00	1	1	16.7000	S	Third	child	False	G	Southampton	yes	False
22	1	3	female	15.00	0	0	8.0292	Q	Third	child	False	NaN	Queenstown	yes	True
39	1	3	female	14.00	1	0	11.2417	C	Third	child	False	NaN	Cherbourg	yes	False
43	1	2	female	3.00	1	2	41.5792	C	Second	child	False	NaN	Cherbourg	yes	False
58	1	2	female	5.00	1	2	27.7500	S	Second	child	False	NaN	Southampton	yes	False
78	1	2	male	0.83	0	2	29.0000	S	Second	child	False	NaN	Southampton	yes	False
125	1	3	male	12.00	1	0	11.2417	C	Third	child	False	NaN	Cherbourg	yes	False
165	1	3	male	9.00	0	2	20.5250	S	Third	child	False	NaN	Southampton	yes	False
172	1	3	female	1.00	1	1	11.1333	S	Third	child	False	NaN	Southampton	yes	False

	zip	location	distance
0	12345	factroy	100
1	34567	factroy	400
2	56789	factroy	700
3	12345	warehouse	200
4	34567	warehouse	500

Pandas Cheat Sheet

Author: Huzaifa Tahir Date: January 31, 2023

Pandas¶

What is Pandas?¶

Why Pandas?¶

How to install Pandas?¶

How to import Pandas?¶

What is DataFrame?¶

What is Series?¶

How to create a DataFrame?¶

Use Seaborn to load the dataset¶

Load the dataset¶

Important function and methods¶

1- head()¶

2- tail()¶

3- info()¶

4- describe()¶

5- shape¶

Reduce the DataFrame size¶

6- column¶

7- dtype¶

8- index¶

9- T (Transpose)¶

10- rename()¶

11- astype()¶

Convert String to number¶

12- drop()¶

13- sort_values()¶

14- groupby()¶

15- merge()¶

16- concat()¶

17- Save dataSet to excel file¶

18- Reverse Row Order¶

19- Reverse column order¶

20- Copy data from clipboard¶

21- Split dataframe into two subsets¶

22- Join Two Datasets¶

23- Filltering a Dataset¶

Boolean indexing¶

24- Filtering by large categories¶

value_counts()¶

25- Splitting a string into multiple columns¶

26- convert one set of values into another one¶

27- Reshaping a dataframe¶

28- loc[]¶

29- iloc[]¶

30- query()¶

------------- Jazak-Allah -------------

Author: Huzaifa Tahir
Date: January 31, 2023

Pandas ¶

-------------
Jazak-Allah
-------------

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	A	B	C	D
0	A0	B0	C0	D0
1	A1	B1	C1	D1
2	A2	B2	C2	D2
3	A3	B3	C3	D3
0	A4	B4	C4	D4
1	A5	B5	C5	D5
2	A6	B6	C6	D6
3	A7	B7	C7	D7