{ "cells": [ { "cell_type": "markdown", "id": "3811e0f8-a8e5-4c26-81fa-23be62a23463", "metadata": {}, "source": [ "# Unión de dataframes" ] }, { "cell_type": "code", "execution_count": 3, "id": "8fb28ec5-ca22-45fc-af77-04c305380951", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 13, "id": "bfb1ac75-48e4-48c3-980b-004a23897ebe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_nameage
0OralieFidgeon30
1ImojeanBenet21
2MicheleWoodlands29
3AilbertRisdale22
4StevyMacGorman24
\n", "
" ], "text/plain": [ " first_name last_name age\n", "0 Oralie Fidgeon 30\n", "1 Imojean Benet 21\n", "2 Michele Woodlands 29\n", "3 Ailbert Risdale 22\n", "4 Stevy MacGorman 24" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Generamos los sets de datos\n", "clients = {'first_name' : ['Oralie' ,'Imojean' ,'Michele', 'Ailbert', 'Stevy'],\n", " 'last_name' : ['Fidgeon' ,'Benet' ,'Woodlands', 'Risdale', 'MacGorman'],\n", " 'age' : [30 ,21 ,29 ,22, 24]}\n", "clients = pd.DataFrame(clients, columns = ['first_name', 'last_name', 'age'])\n", "clients" ] }, { "cell_type": "code", "execution_count": 14, "id": "bdf57f04-1dc5-4a9e-94a5-88a6d5140067", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
invoice_idclient_idamount
01377.91
12224.36
23774.65
34219.75
45727.46
56317.13
67145.77
78481.70
89214.41
910352.69
1011632.03
1112212.78
\n", "
" ], "text/plain": [ " invoice_id client_id amount\n", "0 1 3 77.91\n", "1 2 2 24.36\n", "2 3 7 74.65\n", "3 4 2 19.75\n", "4 5 7 27.46\n", "5 6 3 17.13\n", "6 7 1 45.77\n", "7 8 4 81.70\n", "8 9 2 14.41\n", "9 10 3 52.69\n", "10 11 6 32.03\n", "11 12 2 12.78" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Generamos ahora otro dataset con los importes pagados por los clientes (facturas)\n", "invoices = {'invoice_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],\n", " 'client_id' : [3, 2, 7, 2, 7, 3, 1, 4 ,2, 3, 6, 2],\n", " 'amount': [77.91, 24.36, 74.65, 19.75, 27.46, 17.13, 45.77, 81.7, 14.41, 52.69, 32.03, 12.78]}\n", "invoices = pd.DataFrame(invoices, columns = ['invoice_id', 'client_id', 'amount'])\n", "invoices" ] }, { "cell_type": "markdown", "id": "fcfaba4f-5e7b-4328-8334-967a3d984404", "metadata": {}, "source": [ "## Añadir un nuevo cliente a los datos" ] }, { "cell_type": "code", "execution_count": 15, "id": "a7f36f34-31f1-4b89-94e2-0637877303e9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_nameage
0OralieFidgeon30
1ImojeanBenet21
2MicheleWoodlands29
3AilbertRisdale22
4StevyMacGorman24
0RebeMacCrossan21
\n", "
" ], "text/plain": [ " first_name last_name age\n", "0 Oralie Fidgeon 30\n", "1 Imojean Benet 21\n", "2 Michele Woodlands 29\n", "3 Ailbert Risdale 22\n", "4 Stevy MacGorman 24\n", "0 Rebe MacCrossan 21" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_clients = pd.DataFrame({'first_name' : ['Rebe'],\n", " 'last_name' : ['MacCrossan'],\n", " 'age' : [21]},\n", " columns = ['first_name', 'last_name', 'age'])\n", "clients = pd.concat([clients, new_clients])\n", "clients" ] }, { "cell_type": "code", "execution_count": 9, "id": "badf6a40-3d81-4024-971b-8ad9f047c8d4", "metadata": {}, "outputs": [], "source": [ "# Vemos que al concatenar los datos, se han mantenido los índices originales de \n", "# los dataframes y esto puede ser un problema más adelante. Reseteamos el índice del dataset de clientes" ] }, { "cell_type": "code", "execution_count": 17, "id": "2c31f528-34d2-43c0-bfaf-0ee7fef6141e", "metadata": {}, "outputs": [], "source": [ "clients.index = range(clients.shape[0]) # shape[0] devuelve el total de filas del objeto consultado" ] }, { "cell_type": "code", "execution_count": 21, "id": "2b8b8cc5-782b-44a2-96a0-e2ed01262ebc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_nameage
0OralieFidgeon30
1ImojeanBenet21
2MicheleWoodlands29
3AilbertRisdale22
4StevyMacGorman24
5RebeMacCrossan21
\n", "
" ], "text/plain": [ " first_name last_name age\n", "0 Oralie Fidgeon 30\n", "1 Imojean Benet 21\n", "2 Michele Woodlands 29\n", "3 Ailbert Risdale 22\n", "4 Stevy MacGorman 24\n", "5 Rebe MacCrossan 21" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clients" ] }, { "cell_type": "code", "execution_count": 22, "id": "71ff6b8b-d641-4a9b-9324-08b8ddc4108b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameage
01OralieFidgeon30
12ImojeanBenet21
23MicheleWoodlands29
34AilbertRisdale22
45StevyMacGorman24
56RebeMacCrossan21
\n", "
" ], "text/plain": [ " client_id first_name last_name age\n", "0 1 Oralie Fidgeon 30\n", "1 2 Imojean Benet 21\n", "2 3 Michele Woodlands 29\n", "3 4 Ailbert Risdale 22\n", "4 5 Stevy MacGorman 24\n", "5 6 Rebe MacCrossan 21" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Añadimos una columna con el IdCliente, de otra manera no podríamos cruzar esta tabla con la de facturas\n", "ids = pd.DataFrame({'client_id': [1, 2, 3, 4, 5, 6]}, columns = ['client_id'])\n", "clients = pd.concat([ids, clients], axis=1,)\n", "clients" ] }, { "cell_type": "markdown", "id": "b0fa2e89-c5e2-4484-9e0a-a02df3ed1b97", "metadata": {}, "source": [ "## Unión de dataframes" ] }, { "cell_type": "code", "execution_count": 26, "id": "c93edc4b-68d9-4235-865d-c9bcbfe3cec7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idamount
01OralieFidgeon30745.77
12ImojeanBenet21224.36
22ImojeanBenet21419.75
32ImojeanBenet21914.41
42ImojeanBenet211212.78
53MicheleWoodlands29177.91
63MicheleWoodlands29617.13
73MicheleWoodlands291052.69
84AilbertRisdale22881.70
96RebeMacCrossan211132.03
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id amount\n", "0 1 Oralie Fidgeon 30 7 45.77\n", "1 2 Imojean Benet 21 2 24.36\n", "2 2 Imojean Benet 21 4 19.75\n", "3 2 Imojean Benet 21 9 14.41\n", "4 2 Imojean Benet 21 12 12.78\n", "5 3 Michele Woodlands 29 1 77.91\n", "6 3 Michele Woodlands 29 6 17.13\n", "7 3 Michele Woodlands 29 10 52.69\n", "8 4 Ailbert Risdale 22 8 81.70\n", "9 6 Rebe MacCrossan 21 11 32.03" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tablon = pd.merge(clients, invoices, on ='client_id')\n", "tablon" ] }, { "cell_type": "code", "execution_count": 32, "id": "cb08a679-743e-45a2-80a1-c2acf3bf3b01", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idid_clienteamount
01OralieFidgeon307145.77
12ImojeanBenet212224.36
22ImojeanBenet214219.75
32ImojeanBenet219214.41
42ImojeanBenet2112212.78
53MicheleWoodlands291377.91
63MicheleWoodlands296317.13
73MicheleWoodlands2910352.69
84AilbertRisdale228481.70
96RebeMacCrossan2111632.03
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id id_cliente amount\n", "0 1 Oralie Fidgeon 30 7 1 45.77\n", "1 2 Imojean Benet 21 2 2 24.36\n", "2 2 Imojean Benet 21 4 2 19.75\n", "3 2 Imojean Benet 21 9 2 14.41\n", "4 2 Imojean Benet 21 12 2 12.78\n", "5 3 Michele Woodlands 29 1 3 77.91\n", "6 3 Michele Woodlands 29 6 3 17.13\n", "7 3 Michele Woodlands 29 10 3 52.69\n", "8 4 Ailbert Risdale 22 8 4 81.70\n", "9 6 Rebe MacCrossan 21 11 6 32.03" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# En el caso de que el nombre de la columna de unión de ambas tablas fuera diferente...\n", "invoices.rename(columns = {'client_id':'id_cliente'}, inplace=True)\n", "tablon = pd.merge(clients, invoices, left_on ='client_id', right_on ='id_cliente')\n", "tablon" ] }, { "cell_type": "markdown", "id": "f74b2812-ff38-4f6c-9e57-f85de6ca4f95", "metadata": {}, "source": [ "## Diferentes tipos de unión: Left, Right, Inner y Outer" ] }, { "cell_type": "code", "execution_count": 33, "id": "38d1283a-be4f-453b-82de-f374f9fc10dc", "metadata": {}, "outputs": [], "source": [ "invoices.rename(columns = {'id_cliente':'client_id'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 35, "id": "b80c5c92-0dc9-4845-b0e7-69f17c592a39", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idamount
01OralieFidgeon307.045.77
12ImojeanBenet212.024.36
22ImojeanBenet214.019.75
32ImojeanBenet219.014.41
42ImojeanBenet2112.012.78
53MicheleWoodlands291.077.91
63MicheleWoodlands296.017.13
73MicheleWoodlands2910.052.69
84AilbertRisdale228.081.70
95StevyMacGorman24NaNNaN
106RebeMacCrossan2111.032.03
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id amount\n", "0 1 Oralie Fidgeon 30 7.0 45.77\n", "1 2 Imojean Benet 21 2.0 24.36\n", "2 2 Imojean Benet 21 4.0 19.75\n", "3 2 Imojean Benet 21 9.0 14.41\n", "4 2 Imojean Benet 21 12.0 12.78\n", "5 3 Michele Woodlands 29 1.0 77.91\n", "6 3 Michele Woodlands 29 6.0 17.13\n", "7 3 Michele Woodlands 29 10.0 52.69\n", "8 4 Ailbert Risdale 22 8.0 81.70\n", "9 5 Stevy MacGorman 24 NaN NaN\n", "10 6 Rebe MacCrossan 21 11.0 32.03" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left = pd.merge(clients, invoices, on ='client_id', how ='left')\n", "left" ] }, { "cell_type": "code", "execution_count": 36, "id": "efe25736-8a11-4591-956e-e3ed048586f1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idamount
03MicheleWoodlands29.0177.91
12ImojeanBenet21.0224.36
27NaNNaNNaN374.65
32ImojeanBenet21.0419.75
47NaNNaNNaN527.46
53MicheleWoodlands29.0617.13
61OralieFidgeon30.0745.77
74AilbertRisdale22.0881.70
82ImojeanBenet21.0914.41
93MicheleWoodlands29.01052.69
106RebeMacCrossan21.01132.03
112ImojeanBenet21.01212.78
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id amount\n", "0 3 Michele Woodlands 29.0 1 77.91\n", "1 2 Imojean Benet 21.0 2 24.36\n", "2 7 NaN NaN NaN 3 74.65\n", "3 2 Imojean Benet 21.0 4 19.75\n", "4 7 NaN NaN NaN 5 27.46\n", "5 3 Michele Woodlands 29.0 6 17.13\n", "6 1 Oralie Fidgeon 30.0 7 45.77\n", "7 4 Ailbert Risdale 22.0 8 81.70\n", "8 2 Imojean Benet 21.0 9 14.41\n", "9 3 Michele Woodlands 29.0 10 52.69\n", "10 6 Rebe MacCrossan 21.0 11 32.03\n", "11 2 Imojean Benet 21.0 12 12.78" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "right = pd.merge(clients, invoices, on ='client_id', how ='right')\n", "right" ] }, { "cell_type": "code", "execution_count": 37, "id": "b4f363b8-7600-441f-b8e3-5a78af4ddab6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idamount
01OralieFidgeon30745.77
12ImojeanBenet21224.36
22ImojeanBenet21419.75
32ImojeanBenet21914.41
42ImojeanBenet211212.78
53MicheleWoodlands29177.91
63MicheleWoodlands29617.13
73MicheleWoodlands291052.69
84AilbertRisdale22881.70
96RebeMacCrossan211132.03
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id amount\n", "0 1 Oralie Fidgeon 30 7 45.77\n", "1 2 Imojean Benet 21 2 24.36\n", "2 2 Imojean Benet 21 4 19.75\n", "3 2 Imojean Benet 21 9 14.41\n", "4 2 Imojean Benet 21 12 12.78\n", "5 3 Michele Woodlands 29 1 77.91\n", "6 3 Michele Woodlands 29 6 17.13\n", "7 3 Michele Woodlands 29 10 52.69\n", "8 4 Ailbert Risdale 22 8 81.70\n", "9 6 Rebe MacCrossan 21 11 32.03" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "inner = pd.merge(clients, invoices, on ='client_id', how ='inner')\n", "inner" ] }, { "cell_type": "code", "execution_count": 38, "id": "097846a0-c9b1-4053-a30b-fb25f44460f1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_namelast_nameageinvoice_idamount
01OralieFidgeon30.07.045.77
12ImojeanBenet21.02.024.36
22ImojeanBenet21.04.019.75
32ImojeanBenet21.09.014.41
42ImojeanBenet21.012.012.78
53MicheleWoodlands29.01.077.91
63MicheleWoodlands29.06.017.13
73MicheleWoodlands29.010.052.69
84AilbertRisdale22.08.081.70
95StevyMacGorman24.0NaNNaN
106RebeMacCrossan21.011.032.03
117NaNNaNNaN3.074.65
127NaNNaNNaN5.027.46
\n", "
" ], "text/plain": [ " client_id first_name last_name age invoice_id amount\n", "0 1 Oralie Fidgeon 30.0 7.0 45.77\n", "1 2 Imojean Benet 21.0 2.0 24.36\n", "2 2 Imojean Benet 21.0 4.0 19.75\n", "3 2 Imojean Benet 21.0 9.0 14.41\n", "4 2 Imojean Benet 21.0 12.0 12.78\n", "5 3 Michele Woodlands 29.0 1.0 77.91\n", "6 3 Michele Woodlands 29.0 6.0 17.13\n", "7 3 Michele Woodlands 29.0 10.0 52.69\n", "8 4 Ailbert Risdale 22.0 8.0 81.70\n", "9 5 Stevy MacGorman 24.0 NaN NaN\n", "10 6 Rebe MacCrossan 21.0 11.0 32.03\n", "11 7 NaN NaN NaN 3.0 74.65\n", "12 7 NaN NaN NaN 5.0 27.46" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outer = pd.merge(clients, invoices, on ='client_id', how ='outer')\n", "outer" ] }, { "cell_type": "code", "execution_count": 39, "id": "ebb5d5a9-1d1e-4879-bc72-3bc1a42c2a4e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
client_idfirst_name_1last_name_1age_1first_name_2last_name_2age_2
01OralieFidgeon30OralieFidgeon30
12ImojeanBenet21ImojeanBenet21
23MicheleWoodlands29MicheleWoodlands29
34AilbertRisdale22AilbertRisdale22
45StevyMacGorman24StevyMacGorman24
56RebeMacCrossan21RebeMacCrossan21
\n", "
" ], "text/plain": [ " client_id first_name_1 last_name_1 age_1 first_name_2 last_name_2 age_2\n", "0 1 Oralie Fidgeon 30 Oralie Fidgeon 30\n", "1 2 Imojean Benet 21 Imojean Benet 21\n", "2 3 Michele Woodlands 29 Michele Woodlands 29\n", "3 4 Ailbert Risdale 22 Ailbert Risdale 22\n", "4 5 Stevy MacGorman 24 Stevy MacGorman 24\n", "5 6 Rebe MacCrossan 21 Rebe MacCrossan 21" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Finalmente si en los data frames existen columnas con el mismo nombre \n", "# se puede indicar un sufijo para identificar el origen de cada una. \n", "# De esta manera se puede identificar el origen de cada una de ellas.\n", "pd.merge(clients, clients, on='client_id', suffixes=('_1', '_2'))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }