{
"cells": [
{
"cell_type": "markdown",
"id": "3811e0f8-a8e5-4c26-81fa-23be62a23463",
"metadata": {},
"source": [
"# Unión de dataframes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8fb28ec5-ca22-45fc-af77-04c305380951",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "bfb1ac75-48e4-48c3-980b-004a23897ebe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
"
\n",
" \n",
" | 1 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
"
\n",
" \n",
" | 2 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
"
\n",
" \n",
" | 4 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first_name last_name age\n",
"0 Oralie Fidgeon 30\n",
"1 Imojean Benet 21\n",
"2 Michele Woodlands 29\n",
"3 Ailbert Risdale 22\n",
"4 Stevy MacGorman 24"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Generamos los sets de datos\n",
"clients = {'first_name' : ['Oralie' ,'Imojean' ,'Michele', 'Ailbert', 'Stevy'],\n",
" 'last_name' : ['Fidgeon' ,'Benet' ,'Woodlands', 'Risdale', 'MacGorman'],\n",
" 'age' : [30 ,21 ,29 ,22, 24]}\n",
"clients = pd.DataFrame(clients, columns = ['first_name', 'last_name', 'age'])\n",
"clients"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "bdf57f04-1dc5-4a9e-94a5-88a6d5140067",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" invoice_id | \n",
" client_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 3 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 2 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 7 | \n",
" 74.65 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 2 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 7 | \n",
" 27.46 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" 3 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 6 | \n",
" 7 | \n",
" 1 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 7 | \n",
" 8 | \n",
" 4 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 8 | \n",
" 9 | \n",
" 2 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 9 | \n",
" 10 | \n",
" 3 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 10 | \n",
" 11 | \n",
" 6 | \n",
" 32.03 | \n",
"
\n",
" \n",
" | 11 | \n",
" 12 | \n",
" 2 | \n",
" 12.78 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" invoice_id client_id amount\n",
"0 1 3 77.91\n",
"1 2 2 24.36\n",
"2 3 7 74.65\n",
"3 4 2 19.75\n",
"4 5 7 27.46\n",
"5 6 3 17.13\n",
"6 7 1 45.77\n",
"7 8 4 81.70\n",
"8 9 2 14.41\n",
"9 10 3 52.69\n",
"10 11 6 32.03\n",
"11 12 2 12.78"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Generamos ahora otro dataset con los importes pagados por los clientes (facturas)\n",
"invoices = {'invoice_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],\n",
" 'client_id' : [3, 2, 7, 2, 7, 3, 1, 4 ,2, 3, 6, 2],\n",
" 'amount': [77.91, 24.36, 74.65, 19.75, 27.46, 17.13, 45.77, 81.7, 14.41, 52.69, 32.03, 12.78]}\n",
"invoices = pd.DataFrame(invoices, columns = ['invoice_id', 'client_id', 'amount'])\n",
"invoices"
]
},
{
"cell_type": "markdown",
"id": "fcfaba4f-5e7b-4328-8334-967a3d984404",
"metadata": {},
"source": [
"## Añadir un nuevo cliente a los datos"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "a7f36f34-31f1-4b89-94e2-0637877303e9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
"
\n",
" \n",
" | 1 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
"
\n",
" \n",
" | 2 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
"
\n",
" \n",
" | 4 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
"
\n",
" \n",
" | 0 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first_name last_name age\n",
"0 Oralie Fidgeon 30\n",
"1 Imojean Benet 21\n",
"2 Michele Woodlands 29\n",
"3 Ailbert Risdale 22\n",
"4 Stevy MacGorman 24\n",
"0 Rebe MacCrossan 21"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_clients = pd.DataFrame({'first_name' : ['Rebe'],\n",
" 'last_name' : ['MacCrossan'],\n",
" 'age' : [21]},\n",
" columns = ['first_name', 'last_name', 'age'])\n",
"clients = pd.concat([clients, new_clients])\n",
"clients"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "badf6a40-3d81-4024-971b-8ad9f047c8d4",
"metadata": {},
"outputs": [],
"source": [
"# Vemos que al concatenar los datos, se han mantenido los índices originales de \n",
"# los dataframes y esto puede ser un problema más adelante. Reseteamos el índice del dataset de clientes"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "2c31f528-34d2-43c0-bfaf-0ee7fef6141e",
"metadata": {},
"outputs": [],
"source": [
"clients.index = range(clients.shape[0]) # shape[0] devuelve el total de filas del objeto consultado"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2b8b8cc5-782b-44a2-96a0-e2ed01262ebc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
"
\n",
" \n",
" | 1 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
"
\n",
" \n",
" | 2 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
"
\n",
" \n",
" | 4 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
"
\n",
" \n",
" | 5 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first_name last_name age\n",
"0 Oralie Fidgeon 30\n",
"1 Imojean Benet 21\n",
"2 Michele Woodlands 29\n",
"3 Ailbert Risdale 22\n",
"4 Stevy MacGorman 24\n",
"5 Rebe MacCrossan 21"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clients"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "71ff6b8b-d641-4a9b-9324-08b8ddc4108b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age\n",
"0 1 Oralie Fidgeon 30\n",
"1 2 Imojean Benet 21\n",
"2 3 Michele Woodlands 29\n",
"3 4 Ailbert Risdale 22\n",
"4 5 Stevy MacGorman 24\n",
"5 6 Rebe MacCrossan 21"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Añadimos una columna con el IdCliente, de otra manera no podríamos cruzar esta tabla con la de facturas\n",
"ids = pd.DataFrame({'client_id': [1, 2, 3, 4, 5, 6]}, columns = ['client_id'])\n",
"clients = pd.concat([ids, clients], axis=1,)\n",
"clients"
]
},
{
"cell_type": "markdown",
"id": "b0fa2e89-c5e2-4484-9e0a-a02df3ed1b97",
"metadata": {},
"source": [
"## Unión de dataframes"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c93edc4b-68d9-4235-865d-c9bcbfe3cec7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
" 7 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 2 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 4 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 9 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 12 | \n",
" 12.78 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 1 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 6 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 10 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 8 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
" 8 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 9 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
" 11 | \n",
" 32.03 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id amount\n",
"0 1 Oralie Fidgeon 30 7 45.77\n",
"1 2 Imojean Benet 21 2 24.36\n",
"2 2 Imojean Benet 21 4 19.75\n",
"3 2 Imojean Benet 21 9 14.41\n",
"4 2 Imojean Benet 21 12 12.78\n",
"5 3 Michele Woodlands 29 1 77.91\n",
"6 3 Michele Woodlands 29 6 17.13\n",
"7 3 Michele Woodlands 29 10 52.69\n",
"8 4 Ailbert Risdale 22 8 81.70\n",
"9 6 Rebe MacCrossan 21 11 32.03"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tablon = pd.merge(clients, invoices, on ='client_id')\n",
"tablon"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "cb08a679-743e-45a2-80a1-c2acf3bf3b01",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" id_cliente | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
" 7 | \n",
" 1 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 2 | \n",
" 2 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 4 | \n",
" 2 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 9 | \n",
" 2 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 12 | \n",
" 2 | \n",
" 12.78 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 1 | \n",
" 3 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 6 | \n",
" 3 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 10 | \n",
" 3 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 8 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
" 8 | \n",
" 4 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 9 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
" 11 | \n",
" 6 | \n",
" 32.03 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id id_cliente amount\n",
"0 1 Oralie Fidgeon 30 7 1 45.77\n",
"1 2 Imojean Benet 21 2 2 24.36\n",
"2 2 Imojean Benet 21 4 2 19.75\n",
"3 2 Imojean Benet 21 9 2 14.41\n",
"4 2 Imojean Benet 21 12 2 12.78\n",
"5 3 Michele Woodlands 29 1 3 77.91\n",
"6 3 Michele Woodlands 29 6 3 17.13\n",
"7 3 Michele Woodlands 29 10 3 52.69\n",
"8 4 Ailbert Risdale 22 8 4 81.70\n",
"9 6 Rebe MacCrossan 21 11 6 32.03"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# En el caso de que el nombre de la columna de unión de ambas tablas fuera diferente...\n",
"invoices.rename(columns = {'client_id':'id_cliente'}, inplace=True)\n",
"tablon = pd.merge(clients, invoices, left_on ='client_id', right_on ='id_cliente')\n",
"tablon"
]
},
{
"cell_type": "markdown",
"id": "f74b2812-ff38-4f6c-9e57-f85de6ca4f95",
"metadata": {},
"source": [
"## Diferentes tipos de unión: Left, Right, Inner y Outer"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "38d1283a-be4f-453b-82de-f374f9fc10dc",
"metadata": {},
"outputs": [],
"source": [
"invoices.rename(columns = {'id_cliente':'client_id'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "b80c5c92-0dc9-4845-b0e7-69f17c592a39",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
" 7.0 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 2.0 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 4.0 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 9.0 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 12.0 | \n",
" 12.78 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 1.0 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 6.0 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 10.0 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 8 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
" 8.0 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 9 | \n",
" 5 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 10 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
" 11.0 | \n",
" 32.03 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id amount\n",
"0 1 Oralie Fidgeon 30 7.0 45.77\n",
"1 2 Imojean Benet 21 2.0 24.36\n",
"2 2 Imojean Benet 21 4.0 19.75\n",
"3 2 Imojean Benet 21 9.0 14.41\n",
"4 2 Imojean Benet 21 12.0 12.78\n",
"5 3 Michele Woodlands 29 1.0 77.91\n",
"6 3 Michele Woodlands 29 6.0 17.13\n",
"7 3 Michele Woodlands 29 10.0 52.69\n",
"8 4 Ailbert Risdale 22 8.0 81.70\n",
"9 5 Stevy MacGorman 24 NaN NaN\n",
"10 6 Rebe MacCrossan 21 11.0 32.03"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"left = pd.merge(clients, invoices, on ='client_id', how ='left')\n",
"left"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "efe25736-8a11-4591-956e-e3ed048586f1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 1 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 2 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 3 | \n",
" 74.65 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 4 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 4 | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5 | \n",
" 27.46 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 6 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 6 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30.0 | \n",
" 7 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 7 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22.0 | \n",
" 8 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 8 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 9 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 9 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 10 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 10 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21.0 | \n",
" 11 | \n",
" 32.03 | \n",
"
\n",
" \n",
" | 11 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 12 | \n",
" 12.78 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id amount\n",
"0 3 Michele Woodlands 29.0 1 77.91\n",
"1 2 Imojean Benet 21.0 2 24.36\n",
"2 7 NaN NaN NaN 3 74.65\n",
"3 2 Imojean Benet 21.0 4 19.75\n",
"4 7 NaN NaN NaN 5 27.46\n",
"5 3 Michele Woodlands 29.0 6 17.13\n",
"6 1 Oralie Fidgeon 30.0 7 45.77\n",
"7 4 Ailbert Risdale 22.0 8 81.70\n",
"8 2 Imojean Benet 21.0 9 14.41\n",
"9 3 Michele Woodlands 29.0 10 52.69\n",
"10 6 Rebe MacCrossan 21.0 11 32.03\n",
"11 2 Imojean Benet 21.0 12 12.78"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"right = pd.merge(clients, invoices, on ='client_id', how ='right')\n",
"right"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "b4f363b8-7600-441f-b8e3-5a78af4ddab6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
" 7 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 2 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 4 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 9 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" 12 | \n",
" 12.78 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 1 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 6 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" 10 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 8 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
" 8 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 9 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
" 11 | \n",
" 32.03 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id amount\n",
"0 1 Oralie Fidgeon 30 7 45.77\n",
"1 2 Imojean Benet 21 2 24.36\n",
"2 2 Imojean Benet 21 4 19.75\n",
"3 2 Imojean Benet 21 9 14.41\n",
"4 2 Imojean Benet 21 12 12.78\n",
"5 3 Michele Woodlands 29 1 77.91\n",
"6 3 Michele Woodlands 29 6 17.13\n",
"7 3 Michele Woodlands 29 10 52.69\n",
"8 4 Ailbert Risdale 22 8 81.70\n",
"9 6 Rebe MacCrossan 21 11 32.03"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inner = pd.merge(clients, invoices, on ='client_id', how ='inner')\n",
"inner"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "097846a0-c9b1-4053-a30b-fb25f44460f1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name | \n",
" last_name | \n",
" age | \n",
" invoice_id | \n",
" amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30.0 | \n",
" 7.0 | \n",
" 45.77 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 2.0 | \n",
" 24.36 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 4.0 | \n",
" 19.75 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 9.0 | \n",
" 14.41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21.0 | \n",
" 12.0 | \n",
" 12.78 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 1.0 | \n",
" 77.91 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 6.0 | \n",
" 17.13 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29.0 | \n",
" 10.0 | \n",
" 52.69 | \n",
"
\n",
" \n",
" | 8 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22.0 | \n",
" 8.0 | \n",
" 81.70 | \n",
"
\n",
" \n",
" | 9 | \n",
" 5 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 10 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21.0 | \n",
" 11.0 | \n",
" 32.03 | \n",
"
\n",
" \n",
" | 11 | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 3.0 | \n",
" 74.65 | \n",
"
\n",
" \n",
" | 12 | \n",
" 7 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 5.0 | \n",
" 27.46 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name last_name age invoice_id amount\n",
"0 1 Oralie Fidgeon 30.0 7.0 45.77\n",
"1 2 Imojean Benet 21.0 2.0 24.36\n",
"2 2 Imojean Benet 21.0 4.0 19.75\n",
"3 2 Imojean Benet 21.0 9.0 14.41\n",
"4 2 Imojean Benet 21.0 12.0 12.78\n",
"5 3 Michele Woodlands 29.0 1.0 77.91\n",
"6 3 Michele Woodlands 29.0 6.0 17.13\n",
"7 3 Michele Woodlands 29.0 10.0 52.69\n",
"8 4 Ailbert Risdale 22.0 8.0 81.70\n",
"9 5 Stevy MacGorman 24.0 NaN NaN\n",
"10 6 Rebe MacCrossan 21.0 11.0 32.03\n",
"11 7 NaN NaN NaN 3.0 74.65\n",
"12 7 NaN NaN NaN 5.0 27.46"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outer = pd.merge(clients, invoices, on ='client_id', how ='outer')\n",
"outer"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "ebb5d5a9-1d1e-4879-bc72-3bc1a42c2a4e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" client_id | \n",
" first_name_1 | \n",
" last_name_1 | \n",
" age_1 | \n",
" first_name_2 | \n",
" last_name_2 | \n",
" age_2 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
" Oralie | \n",
" Fidgeon | \n",
" 30 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
" Imojean | \n",
" Benet | \n",
" 21 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
" Michele | \n",
" Woodlands | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
" Ailbert | \n",
" Risdale | \n",
" 22 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
" Stevy | \n",
" MacGorman | \n",
" 24 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
" Rebe | \n",
" MacCrossan | \n",
" 21 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" client_id first_name_1 last_name_1 age_1 first_name_2 last_name_2 age_2\n",
"0 1 Oralie Fidgeon 30 Oralie Fidgeon 30\n",
"1 2 Imojean Benet 21 Imojean Benet 21\n",
"2 3 Michele Woodlands 29 Michele Woodlands 29\n",
"3 4 Ailbert Risdale 22 Ailbert Risdale 22\n",
"4 5 Stevy MacGorman 24 Stevy MacGorman 24\n",
"5 6 Rebe MacCrossan 21 Rebe MacCrossan 21"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Finalmente si en los data frames existen columnas con el mismo nombre \n",
"# se puede indicar un sufijo para identificar el origen de cada una. \n",
"# De esta manera se puede identificar el origen de cada una de ellas.\n",
"pd.merge(clients, clients, on='client_id', suffixes=('_1', '_2'))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}