{ "cells": [ { "cell_type": "markdown", "id": "25ef58bf-d727-47a4-973d-c531a4222de3", "metadata": {}, "source": [ "# Ejercicio Transacciones. Parte 1 - Preparación de los datos." ] }, { "cell_type": "markdown", "id": "0fd9e399-5dc7-4aeb-8d67-2d6fce697f33", "metadata": {}, "source": [ "Ejercicio dividido en varias partes con el que analizaremos un conjunto de datos de transacciones reales efectuadas en el área del gran Bilbao." ] }, { "cell_type": "code", "execution_count": 6, "id": "fad1447d-9b67-40d4-8031-1804ad13f249", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from datetime import datetime\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "pd.options.display.float_format = '{:.2f}'.format #Desactivar notación científica en pandas:\n", "np.set_printoptions(suppress=True) #Desactivar notación científica en numpy:\n", "pd.set_option('display.max_columns', None) #comando para mostrar todas las columnas" ] }, { "cell_type": "markdown", "id": "e9978095-2516-4cac-88d2-f00dc8c35243", "metadata": {}, "source": [ "#### Cargamos el primer set de datos con el que vamos a trabajar" ] }, { "cell_type": "code", "execution_count": 59, "id": "444906b0-937f-4b76-82fd-13d0b23a30d1", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('./data/transacciones.csv', sep=';', decimal=',')" ] }, { "cell_type": "code", "execution_count": 60, "id": "694ece12-9394-4815-802b-54b5b430c02a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FechaCódigo PostalTipo de ComercioTransaccionesImporte Medio
02015010148004es_parking89.24
12015010148004es_pharmacy315.40
22015010148004es_taxi219.80
32015010148004es_gas252.98
42015010148004es_fastfood229.25
..................
1109432015123148014es_cafe136.00
1109442015123148014es_hospital150.00
1109452015123148014es_taxi122.50
1109462015123148014es_wellness122.00
1109472015123148014es_leather159.00
\n", "

110948 rows × 5 columns

\n", "
" ], "text/plain": [ " Fecha Código Postal Tipo de Comercio Transacciones Importe Medio\n", "0 20150101 48004 es_parking 8 9.24\n", "1 20150101 48004 es_pharmacy 3 15.40\n", "2 20150101 48004 es_taxi 2 19.80\n", "3 20150101 48004 es_gas 2 52.98\n", "4 20150101 48004 es_fastfood 2 29.25\n", "... ... ... ... ... ...\n", "110943 20151231 48014 es_cafe 1 36.00\n", "110944 20151231 48014 es_hospital 1 50.00\n", "110945 20151231 48014 es_taxi 1 22.50\n", "110946 20151231 48014 es_wellness 1 22.00\n", "110947 20151231 48014 es_leather 1 59.00\n", "\n", "[110948 rows x 5 columns]" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 61, "id": "0e660abe-d0f2-46de-b982-aac353ec29c3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 110948 entries, 0 to 110947\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 110948 non-null int64 \n", " 1 Código Postal 110948 non-null int64 \n", " 2 Tipo de Comercio 110948 non-null object \n", " 3 Transacciones 110948 non-null int64 \n", " 4 Importe Medio 110948 non-null float64\n", "dtypes: float64(1), int64(3), object(1)\n", "memory usage: 4.2+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 62, "id": "31b29daf-ad2c-45ce-a527-8b3f245828e7", "metadata": {}, "outputs": [], "source": [ "# Modificamos el campo Fecha para que sea una fecha y el código postal a string\n", "df['Fecha'] = pd.to_datetime(df['Fecha'].astype('string'))\n", "df['Código Postal'] = df['Código Postal'].astype('string')" ] }, { "cell_type": "code", "execution_count": 63, "id": "a400ce7e-ced7-4c28-98d2-d28880012673", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 110948 entries, 0 to 110947\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 110948 non-null datetime64[ns]\n", " 1 Código Postal 110948 non-null string \n", " 2 Tipo de Comercio 110948 non-null object \n", " 3 Transacciones 110948 non-null int64 \n", " 4 Importe Medio 110948 non-null float64 \n", "dtypes: datetime64[ns](1), float64(1), int64(1), object(1), string(1)\n", "memory usage: 4.2+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 65, "id": "010675b4-cd11-4e3b-8cbb-a1a33beb4b42", "metadata": {}, "outputs": [], "source": [ "# Enriquecemos un poco...\n", "df['Importe Total'] = df['Importe Medio'] * df['Transacciones']" ] }, { "cell_type": "code", "execution_count": 66, "id": "4a83d323-62a8-4bba-8232-fead4c3fa829", "metadata": {}, "outputs": [], "source": [ "# Guardamos el resultado\n", "df.to_csv('./data/transacciones2.csv', header=True, encoding = 'UTF-8', sep=\";\", decimal=',', index=False)" ] }, { "cell_type": "markdown", "id": "2c314b19-f6aa-419b-9263-3589ab573004", "metadata": {}, "source": [ "#### Cargamos el segundo set de datos" ] }, { "cell_type": "code", "execution_count": 49, "id": "b501b861-caa3-4248-abe7-312d410590b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 365 entries, 0 to 364\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 365 non-null object \n", " 1 Temperatura media (_C) 365 non-null float64\n", " 2 Humedad media (%) 358 non-null float64\n", " 3 Precipitaciones media (l/m2) 365 non-null float64\n", " 4 Viento medio (km/h) 365 non-null float64\n", "dtypes: float64(4), object(1)\n", "memory usage: 14.4+ KB\n" ] } ], "source": [ "clima = pd.read_csv('./data/clima.csv', sep=\";\", decimal=',')\n", "clima.info()" ] }, { "cell_type": "code", "execution_count": 50, "id": "be470227-d1b0-44c8-a355-a9f7ae6328f9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FechaTemperatura media (_C)Humedad media (%)Precipitaciones media (l/m2)Viento medio (km/h)
001/01/156.1064.000.0014.20
102/01/156.9066.000.0010.40
203/01/158.2071.000.009.80
304/01/1510.6079.000.008.00
405/01/159.0077.000.0011.10
..................
36027/12/1514.7041.000.0019.00
36128/12/1515.8052.004.3022.50
36229/12/1514.7050.000.0019.90
36330/12/1516.0052.000.5027.60
36431/12/1511.5083.002.608.90
\n", "

365 rows × 5 columns

\n", "
" ], "text/plain": [ " Fecha Temperatura media (_C) Humedad media (%) \\\n", "0 01/01/15 6.10 64.00 \n", "1 02/01/15 6.90 66.00 \n", "2 03/01/15 8.20 71.00 \n", "3 04/01/15 10.60 79.00 \n", "4 05/01/15 9.00 77.00 \n", ".. ... ... ... \n", "360 27/12/15 14.70 41.00 \n", "361 28/12/15 15.80 52.00 \n", "362 29/12/15 14.70 50.00 \n", "363 30/12/15 16.00 52.00 \n", "364 31/12/15 11.50 83.00 \n", "\n", " Precipitaciones media (l/m2) Viento medio (km/h) \n", "0 0.00 14.20 \n", "1 0.00 10.40 \n", "2 0.00 9.80 \n", "3 0.00 8.00 \n", "4 0.00 11.10 \n", ".. ... ... \n", "360 0.00 19.00 \n", "361 4.30 22.50 \n", "362 0.00 19.90 \n", "363 0.50 27.60 \n", "364 2.60 8.90 \n", "\n", "[365 rows x 5 columns]" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clima" ] }, { "cell_type": "code", "execution_count": 51, "id": "b4776551-f0bb-4d64-860a-7afa9dbe735d", "metadata": {}, "outputs": [], "source": [ "# Como en el set previo, corregimos el tipo de la columna fecha\n", "clima['Fecha'] = pd.to_datetime(clima['Fecha'].astype('string'))" ] }, { "cell_type": "code", "execution_count": 52, "id": "7f8ed545-98d9-45dc-bb71-2757cca9ccc6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 365 entries, 0 to 364\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 365 non-null datetime64[ns]\n", " 1 Temperatura media (_C) 365 non-null float64 \n", " 2 Humedad media (%) 358 non-null float64 \n", " 3 Precipitaciones media (l/m2) 365 non-null float64 \n", " 4 Viento medio (km/h) 365 non-null float64 \n", "dtypes: datetime64[ns](1), float64(4)\n", "memory usage: 14.4 KB\n" ] } ], "source": [ "clima.info()" ] }, { "cell_type": "code", "execution_count": 53, "id": "d2b5a451-c01a-4f34-87f3-b7b65e1afedf", "metadata": {}, "outputs": [], "source": [ "clima.to_csv('./data/clima2.csv', sep=';', decimal=',', encoding='UTF-8', index=False)" ] }, { "cell_type": "markdown", "id": "f34455ca-a6b5-4a2f-bb1c-50420ac447f7", "metadata": {}, "source": [ "#### Tercer dataset a modificar. Dataset con los festivos del 2015" ] }, { "cell_type": "code", "execution_count": 54, "id": "160b020e-cdf7-43b9-b94c-3ee5dabbf887", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 21 entries, 0 to 20\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 21 non-null object\n", " 1 Descripcion 21 non-null object\n", "dtypes: object(2)\n", "memory usage: 464.0+ bytes\n" ] } ], "source": [ "datos = pd.read_csv('./data/festivos.csv', sep=';')\n", "datos.info()" ] }, { "cell_type": "code", "execution_count": 55, "id": "3b8264ed-0f40-4caa-8eb0-7aa372d9fa27", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FechaDescripcion
001/01/2015Anio nuevo
106/01/2015Epifania del Senor
219/03/2015San Jose
302/04/2015Jueves Santo
403/04/2015Viernes Santo
506/04/2015Lunes de Pascua de Resurreccion
601/05/2015Fiesta del Trabajo
725/07/2015Santiago Apostol
815/08/2015Asuncion de la Virgen
922/08/2015Aste Nagusi
1023/08/2015Aste Nagusi
1124/08/2015Aste Nagusi
1225/08/2015Aste Nagusi
1326/08/2015Aste Nagusi
1427/08/2015Aste Nagusi
1528/08/2015Viernes de la Semana Grande
1629/08/2015Aste Nagusi
1730/08/2015Aste Nagusi
1812/10/2015Fiesta Nacional
1908/12/2015Inmaculada Concepcion
2025/12/2015Natividad del Senor
\n", "
" ], "text/plain": [ " Fecha Descripcion\n", "0 01/01/2015 Anio nuevo\n", "1 06/01/2015 Epifania del Senor \n", "2 19/03/2015 San Jose\n", "3 02/04/2015 Jueves Santo\n", "4 03/04/2015 Viernes Santo\n", "5 06/04/2015 Lunes de Pascua de Resurreccion\n", "6 01/05/2015 Fiesta del Trabajo\n", "7 25/07/2015 Santiago Apostol\n", "8 15/08/2015 Asuncion de la Virgen\n", "9 22/08/2015 Aste Nagusi\n", "10 23/08/2015 Aste Nagusi\n", "11 24/08/2015 Aste Nagusi\n", "12 25/08/2015 Aste Nagusi\n", "13 26/08/2015 Aste Nagusi\n", "14 27/08/2015 Aste Nagusi\n", "15 28/08/2015 Viernes de la Semana Grande\n", "16 29/08/2015 Aste Nagusi\n", "17 30/08/2015 Aste Nagusi\n", "18 12/10/2015 Fiesta Nacional\n", "19 08/12/2015 Inmaculada Concepcion\n", "20 25/12/2015 Natividad del Senor" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "datos" ] }, { "cell_type": "code", "execution_count": 56, "id": "a5b4e607-37c0-4b72-8554-48fb9d84e658", "metadata": {}, "outputs": [], "source": [ "# Convertimos la fecha en fecha\n", "datos['Fecha'] = pd.to_datetime(datos['Fecha'].astype('string'))" ] }, { "cell_type": "code", "execution_count": 57, "id": "1e4a5ee2-b935-49d8-b357-b423a5f2935a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 21 entries, 0 to 20\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Fecha 21 non-null datetime64[ns]\n", " 1 Descripcion 21 non-null object \n", "dtypes: datetime64[ns](1), object(1)\n", "memory usage: 464.0+ bytes\n" ] } ], "source": [ "datos.info()" ] }, { "cell_type": "code", "execution_count": 58, "id": "df6428f7-7beb-4e54-b777-6c75b75b32f7", "metadata": {}, "outputs": [], "source": [ "datos.to_csv('./data/festivos2.csv', sep=';', encoding='UTF-8', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }