{
"cells": [
{
"cell_type": "markdown",
"id": "00e7695b",
"metadata": {},
"source": [
"# Parte 1: Lectura de datos con diferentes formatos"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "de7a4f1c",
"metadata": {},
"outputs": [],
"source": [
"%reset -f"
]
},
{
"cell_type": "code",
"execution_count": 175,
"id": "75c1c998",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from datetime import datetime\n",
"# Librería para tratamiento de ficheros xml\n",
"import xml.etree.ElementTree as ET\n",
"# Librería para efectuar peticiones a páginas web\n",
"import requests\n"
]
},
{
"cell_type": "markdown",
"id": "5868e025",
"metadata": {},
"source": [
"Vamos a generar las funciones necesarias para cargar los datos en los diferentes formatos"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "a5581176",
"metadata": {},
"outputs": [],
"source": [
"# carga de archivos xml\n",
"\n",
"def cargaXml():\n",
" inicio = datetime.now()\n",
" xml_data = open('./data/calendario_laboral_2021.xml', 'r').read() # Leer archivo\n",
" root = ET.XML(xml_data) # Parsear el XML\n",
" data = []\n",
" cols = []\n",
" for i, child in enumerate(root):\n",
" data.append([subchild.text for subchild in child])\n",
" cols.append(child.tag)\n",
"\n",
" df = pd.DataFrame(data).T # Escribir en DF y transponer datos\n",
" df.columns = cols # Actualizamos nombres de columna Update column names\n",
" \n",
" return ((datetime.now()-inicio).total_seconds())"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "7ce72dfb",
"metadata": {},
"outputs": [],
"source": [
"# carga de archivos xlsx\n",
"def cargaXlsx ():\n",
" inicio = datetime.now()\n",
" xlsx_data = pd.read_excel('./data/calendario_laboral_2021.xlsx', sheet_name='calendario_laboral_2021')\n",
" return ((datetime.now()-inicio).total_seconds())"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "82ef1c7c",
"metadata": {},
"outputs": [],
"source": [
"# carga de archivos csv\n",
"def cargaCsv ():\n",
" inicio = datetime.now()\n",
" csv_data = pd.read_csv('./data/calendario_laboral_2021.csv', sep=';')\n",
" return ((datetime.now()-inicio).total_seconds())"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "d0ec11b5",
"metadata": {},
"outputs": [],
"source": [
"# carga de archivos json\n",
"def cargaJson ():\n",
" inicio = datetime.now()\n",
" json_data = pd.read_json('./data/calendario_laboral_2021.json')\n",
" return ((datetime.now()-inicio).total_seconds())"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "f5ad13c7",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0.012827"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cargaXml()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "b59f147d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.068827"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cargaXlsx()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "f67d2813",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.004919"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cargaCsv()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "4e07ca51",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.012655"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cargaJson()"
]
},
{
"cell_type": "markdown",
"id": "b36d62dd",
"metadata": {},
"source": [
"Vemos que la carga de datos más rápida es a través de CSV's. También podemos cargar archivos directamente de internet..."
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "c28b6617",
"metadata": {},
"outputs": [],
"source": [
"# carga de archivos csv internet\n",
"def cargaCsvInternet ():\n",
" inicio = datetime.now()\n",
" csv_data = pd.read_csv('https://opendata.euskadi.eus/contenidos/ds_eventos/calendario_laboral_2021/opendata/calendario_laboral_2021.csv', sep=';')\n",
" return ((datetime.now()-inicio).total_seconds())"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "cc91fbf5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.242901"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cargaCsvInternet()"
]
},
{
"cell_type": "markdown",
"id": "dc7c983c",
"metadata": {},
"source": [
"Lo ponemos bonito y automatizamos... Ejecutaremos el proceso 5 veces para tener un tiempo medio de carga por tipo de archivo"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "9d45687e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteración: 0\n",
"Iteración: 1\n",
"Iteración: 2\n",
"Iteración: 3\n",
"Iteración: 4\n"
]
}
],
"source": [
"resultados = pd.DataFrame(columns=['xml','xlsx','csv','json','csvInternet'])\n",
"tiempos = np.array([])\n",
"for a in range(5):\n",
" print ('Iteración:', a)\n",
" xml = cargaXml()\n",
" xlsx = cargaXlsx()\n",
" csv = cargaCsv()\n",
" json = cargaJson()\n",
" csvInternet = cargaCsvInternet()\n",
" \n",
" tiempos = pd.Series(np.array([xml, xlsx, csv, json, csvInternet]), index=resultados.columns)\n",
" resultados = resultados.append(tiempos, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "13a1bec3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" xml | \n",
" xlsx | \n",
" csv | \n",
" json | \n",
" csvInternet | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.013161 | \n",
" 0.072745 | \n",
" 0.002216 | \n",
" 0.007502 | \n",
" 0.886830 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.010666 | \n",
" 0.066649 | \n",
" 0.002196 | \n",
" 0.007710 | \n",
" 0.874905 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0.017747 | \n",
" 0.076320 | \n",
" 0.002325 | \n",
" 0.008650 | \n",
" 0.892770 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.016354 | \n",
" 0.068573 | \n",
" 0.002189 | \n",
" 0.007475 | \n",
" 0.894838 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.017680 | \n",
" 0.070100 | \n",
" 0.002233 | \n",
" 0.007541 | \n",
" 0.924653 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" xml xlsx csv json csvInternet\n",
"0 0.013161 0.072745 0.002216 0.007502 0.886830\n",
"1 0.010666 0.066649 0.002196 0.007710 0.874905\n",
"2 0.017747 0.076320 0.002325 0.008650 0.892770\n",
"3 0.016354 0.068573 0.002189 0.007475 0.894838\n",
"4 0.017680 0.070100 0.002233 0.007541 0.924653"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"resultados"
]
},
{
"cell_type": "markdown",
"id": "1b1972bc",
"metadata": {},
"source": [
"Calculamos los tiempos medios para cada tipo de archivo"
]
},
{
"cell_type": "code",
"execution_count": 173,
"id": "f2a526eb",
"metadata": {},
"outputs": [],
"source": [
"tiemposMedios = pd.DataFrame(columns=['xml','xlsx','csv','json','csvInternet'])\n",
"\n",
"tiemposMedios.loc ['0','xml'] = round(resultados ['xml'].mean(),3)\n",
"tiemposMedios.loc ['0','xlsx'] = round(resultados ['xlsx'].mean(),3)\n",
"tiemposMedios.loc ['0','csv'] = round(resultados ['csv'].mean(),3)\n",
"tiemposMedios.loc ['0','json'] = round(resultados ['json'].mean(),3)\n",
"tiemposMedios.loc ['0','csvInternet'] = round(resultados ['csvInternet'].mean(),3)"
]
},
{
"cell_type": "code",
"execution_count": 174,
"id": "8ffe8b94",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" xml | \n",
" xlsx | \n",
" csv | \n",
" json | \n",
" csvInternet | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.015 | \n",
" 0.071 | \n",
" 0.002 | \n",
" 0.008 | \n",
" 0.895 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" xml xlsx csv json csvInternet\n",
"0 0.015 0.071 0.002 0.008 0.895"
]
},
"execution_count": 174,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tiemposMedios"
]
},
{
"cell_type": "markdown",
"id": "c45f804e",
"metadata": {},
"source": [
"También podemos cargar tablas desde una url en internet..."
]
},
{
"cell_type": "code",
"execution_count": 195,
"id": "616c5afe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" County Code | \n",
" Tract Code | \n",
" Tract Income Level | \n",
" Distressed or Under -served Tract | \n",
" Tract Median Family Income % | \n",
" 2011 HUD Est. MSA/MD non-MSA/MD Median Family Income | \n",
" 2011 Est. Tract Median Family Income | \n",
" 2000 Tract Median Family Income | \n",
" Tract Population | \n",
" Tract Minority % | \n",
" Minority Population | \n",
" Owner Occupied Units | \n",
" 1- to 4- Family Units | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 15 | \n",
" 1.00 | \n",
" Moderate | \n",
" No | \n",
" 74.23 | \n",
" $51,500 | \n",
" $38,228 | \n",
" $29,615 | \n",
" 2279 | \n",
" 45.77 | \n",
" 1043 | \n",
" 431 | \n",
" 894 | \n",
"
\n",
" \n",
" | 1 | \n",
" 15 | \n",
" 2.00 | \n",
" Middle | \n",
" No | \n",
" 86.40 | \n",
" $51,500 | \n",
" $44,496 | \n",
" $34,468 | \n",
" 3084 | \n",
" 42.44 | \n",
" 1309 | \n",
" 881 | \n",
" 1543 | \n",
"
\n",
" \n",
" | 2 | \n",
" 15 | \n",
" 3.00 | \n",
" Moderate | \n",
" No | \n",
" 59.27 | \n",
" $51,500 | \n",
" $30,524 | \n",
" $23,644 | \n",
" 3545 | \n",
" 78.87 | \n",
" 2796 | \n",
" 637 | \n",
" 1562 | \n",
"
\n",
" \n",
" | 3 | \n",
" 15 | \n",
" 4.00 | \n",
" Moderate | \n",
" No | \n",
" 76.24 | \n",
" $51,500 | \n",
" $39,264 | \n",
" $30,417 | \n",
" 2777 | \n",
" 59.42 | \n",
" 1650 | \n",
" 908 | \n",
" 1335 | \n",
"
\n",
" \n",
" | 4 | \n",
" 15 | \n",
" 5.00 | \n",
" Low | \n",
" No | \n",
" 40.54 | \n",
" $51,500 | \n",
" $20,878 | \n",
" $16,172 | \n",
" 2372 | \n",
" 93.55 | \n",
" 2219 | \n",
" 597 | \n",
" 1306 | \n",
"
\n",
" \n",
" | 5 | \n",
" 15 | \n",
" 6.00 | \n",
" Low | \n",
" No | \n",
" 30.36 | \n",
" $51,500 | \n",
" $15,635 | \n",
" $12,113 | \n",
" 2439 | \n",
" 77.74 | \n",
" 1896 | \n",
" 363 | \n",
" 736 | \n",
"
\n",
" \n",
" | 6 | \n",
" 15 | \n",
" 8.00 | \n",
" Moderate | \n",
" No | \n",
" 72.62 | \n",
" $51,500 | \n",
" $37,399 | \n",
" $28,971 | \n",
" 1475 | \n",
" 55.05 | \n",
" 812 | \n",
" 293 | \n",
" 674 | \n",
"
\n",
" \n",
" | 7 | \n",
" 15 | \n",
" 9.00 | \n",
" Upper | \n",
" No | \n",
" 147.52 | \n",
" $51,500 | \n",
" $75,973 | \n",
" $58,854 | \n",
" 3705 | \n",
" 16.22 | \n",
" 601 | \n",
" 1214 | \n",
" 1606 | \n",
"
\n",
" \n",
" | 8 | \n",
" 15 | \n",
" 10.00 | \n",
" Upper | \n",
" No | \n",
" 142.34 | \n",
" $51,500 | \n",
" $73,305 | \n",
" $56,786 | \n",
" 5538 | \n",
" 15.98 | \n",
" 885 | \n",
" 1660 | \n",
" 2226 | \n",
"
\n",
" \n",
" | 9 | \n",
" 15 | \n",
" 11.00 | \n",
" Upper | \n",
" No | \n",
" 122.20 | \n",
" $51,500 | \n",
" $62,933 | \n",
" $48,750 | \n",
" 6096 | \n",
" 16.44 | \n",
" 1002 | \n",
" 1667 | \n",
" 2398 | \n",
"
\n",
" \n",
" | 10 | \n",
" 15 | \n",
" 12.00 | \n",
" Middle | \n",
" No | \n",
" 101.99 | \n",
" $51,500 | \n",
" $52,525 | \n",
" $40,688 | \n",
" 7545 | \n",
" 22.29 | \n",
" 1682 | \n",
" 2219 | \n",
" 3155 | \n",
"
\n",
" \n",
" | 11 | \n",
" 15 | \n",
" 13.00 | \n",
" Moderate | \n",
" No | \n",
" 71.07 | \n",
" $51,500 | \n",
" $36,601 | \n",
" $28,355 | \n",
" 2481 | \n",
" 4.51 | \n",
" 112 | \n",
" 763 | \n",
" 1187 | \n",
"
\n",
" \n",
" | 12 | \n",
" 15 | \n",
" 14.00 | \n",
" Middle | \n",
" No | \n",
" 92.76 | \n",
" $51,500 | \n",
" $47,771 | \n",
" $37,005 | \n",
" 3545 | \n",
" 23.30 | \n",
" 826 | \n",
" 1166 | \n",
" 1581 | \n",
"
\n",
" \n",
" | 13 | \n",
" 15 | \n",
" 15.00 | \n",
" Middle | \n",
" No | \n",
" 100.88 | \n",
" $51,500 | \n",
" $51,953 | \n",
" $40,247 | \n",
" 5471 | \n",
" 8.59 | \n",
" 470 | \n",
" 1839 | \n",
" 2311 | \n",
"
\n",
" \n",
" | 14 | \n",
" 15 | \n",
" 16.00 | \n",
" Middle | \n",
" No | \n",
" 92.93 | \n",
" $51,500 | \n",
" $47,859 | \n",
" $37,074 | \n",
" 3822 | \n",
" 11.09 | \n",
" 424 | \n",
" 1199 | \n",
" 1756 | \n",
"
\n",
" \n",
" | 15 | \n",
" 15 | \n",
" 17.00 | \n",
" Upper | \n",
" No | \n",
" 121.62 | \n",
" $51,500 | \n",
" $62,634 | \n",
" $48,520 | \n",
" 6530 | \n",
" 19.91 | \n",
" 1300 | \n",
" 2091 | \n",
" 2651 | \n",
"
\n",
" \n",
" | 16 | \n",
" 15 | \n",
" 18.00 | \n",
" Middle | \n",
" No | \n",
" 101.16 | \n",
" $51,500 | \n",
" $52,097 | \n",
" $40,359 | \n",
" 6653 | \n",
" 12.63 | \n",
" 840 | \n",
" 2087 | \n",
" 2894 | \n",
"
\n",
" \n",
" | 17 | \n",
" 15 | \n",
" 19.00 | \n",
" Middle | \n",
" No | \n",
" 97.13 | \n",
" $51,500 | \n",
" $50,022 | \n",
" $38,750 | \n",
" 19 | \n",
" 15.79 | \n",
" 3 | \n",
" 5 | \n",
" 446 | \n",
"
\n",
" \n",
" | 18 | \n",
" 15 | \n",
" 20.00 | \n",
" Middle | \n",
" No | \n",
" 101.02 | \n",
" $51,500 | \n",
" $52,025 | \n",
" $40,302 | \n",
" 5539 | \n",
" 6.32 | \n",
" 350 | \n",
" 1815 | \n",
" 2321 | \n",
"
\n",
" \n",
" | 19 | \n",
" 15 | \n",
" 21.01 | \n",
" Low | \n",
" No | \n",
" 42.57 | \n",
" $51,500 | \n",
" $21,924 | \n",
" $16,985 | \n",
" 2637 | \n",
" 30.94 | \n",
" 816 | \n",
" 199 | \n",
" 570 | \n",
"
\n",
" \n",
" | 20 | \n",
" 15 | \n",
" 21.02 | \n",
" Upper | \n",
" No | \n",
" 167.87 | \n",
" $51,500 | \n",
" $86,453 | \n",
" $66,970 | \n",
" 2651 | \n",
" 13.43 | \n",
" 356 | \n",
" 868 | \n",
" 1145 | \n",
"
\n",
" \n",
" | 21 | \n",
" 15 | \n",
" 21.03 | \n",
" Middle | \n",
" No | \n",
" 92.01 | \n",
" $51,500 | \n",
" $47,385 | \n",
" $36,708 | \n",
" 4569 | \n",
" 25.52 | \n",
" 1166 | \n",
" 947 | \n",
" 1759 | \n",
"
\n",
" \n",
" | 22 | \n",
" 15 | \n",
" 22.00 | \n",
" Middle | \n",
" No | \n",
" 94.29 | \n",
" $51,500 | \n",
" $48,559 | \n",
" $37,618 | \n",
" 3784 | \n",
" 4.97 | \n",
" 188 | \n",
" 1286 | \n",
" 1752 | \n",
"
\n",
" \n",
" | 23 | \n",
" 15 | \n",
" 23.00 | \n",
" Moderate | \n",
" No | \n",
" 75.40 | \n",
" $51,500 | \n",
" $38,831 | \n",
" $30,081 | \n",
" 4043 | \n",
" 15.38 | \n",
" 622 | \n",
" 1206 | \n",
" 1877 | \n",
"
\n",
" \n",
" | 24 | \n",
" 15 | \n",
" 24.00 | \n",
" Middle | \n",
" No | \n",
" 103.82 | \n",
" $51,500 | \n",
" $53,467 | \n",
" $41,417 | \n",
" 4357 | \n",
" 4.50 | \n",
" 196 | \n",
" 1447 | \n",
" 1807 | \n",
"
\n",
" \n",
" | 25 | \n",
" 15 | \n",
" 25.01 | \n",
" Middle | \n",
" No | \n",
" 114.30 | \n",
" $51,500 | \n",
" $58,865 | \n",
" $45,600 | \n",
" 6635 | \n",
" 9.25 | \n",
" 614 | \n",
" 2132 | \n",
" 2684 | \n",
"
\n",
" \n",
" | 26 | \n",
" 15 | \n",
" 25.02 | \n",
" Middle | \n",
" No | \n",
" 95.27 | \n",
" $51,500 | \n",
" $49,064 | \n",
" $38,008 | \n",
" 4395 | \n",
" 3.89 | \n",
" 171 | \n",
" 1462 | \n",
" 1844 | \n",
"
\n",
" \n",
" | 27 | \n",
" 15 | \n",
" 26.00 | \n",
" Middle | \n",
" No | \n",
" 100.75 | \n",
" $51,500 | \n",
" $51,886 | \n",
" $40,194 | \n",
" 4263 | \n",
" 7.08 | \n",
" 302 | \n",
" 1463 | \n",
" 1918 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" County Code Tract Code Tract Income Level \\\n",
"0 15 1.00 Moderate \n",
"1 15 2.00 Middle \n",
"2 15 3.00 Moderate \n",
"3 15 4.00 Moderate \n",
"4 15 5.00 Low \n",
"5 15 6.00 Low \n",
"6 15 8.00 Moderate \n",
"7 15 9.00 Upper \n",
"8 15 10.00 Upper \n",
"9 15 11.00 Upper \n",
"10 15 12.00 Middle \n",
"11 15 13.00 Moderate \n",
"12 15 14.00 Middle \n",
"13 15 15.00 Middle \n",
"14 15 16.00 Middle \n",
"15 15 17.00 Upper \n",
"16 15 18.00 Middle \n",
"17 15 19.00 Middle \n",
"18 15 20.00 Middle \n",
"19 15 21.01 Low \n",
"20 15 21.02 Upper \n",
"21 15 21.03 Middle \n",
"22 15 22.00 Middle \n",
"23 15 23.00 Moderate \n",
"24 15 24.00 Middle \n",
"25 15 25.01 Middle \n",
"26 15 25.02 Middle \n",
"27 15 26.00 Middle \n",
"\n",
" Distressed or Under -served Tract Tract Median Family Income % \\\n",
"0 No 74.23 \n",
"1 No 86.40 \n",
"2 No 59.27 \n",
"3 No 76.24 \n",
"4 No 40.54 \n",
"5 No 30.36 \n",
"6 No 72.62 \n",
"7 No 147.52 \n",
"8 No 142.34 \n",
"9 No 122.20 \n",
"10 No 101.99 \n",
"11 No 71.07 \n",
"12 No 92.76 \n",
"13 No 100.88 \n",
"14 No 92.93 \n",
"15 No 121.62 \n",
"16 No 101.16 \n",
"17 No 97.13 \n",
"18 No 101.02 \n",
"19 No 42.57 \n",
"20 No 167.87 \n",
"21 No 92.01 \n",
"22 No 94.29 \n",
"23 No 75.40 \n",
"24 No 103.82 \n",
"25 No 114.30 \n",
"26 No 95.27 \n",
"27 No 100.75 \n",
"\n",
" 2011 HUD Est. MSA/MD non-MSA/MD Median Family Income \\\n",
"0 $51,500 \n",
"1 $51,500 \n",
"2 $51,500 \n",
"3 $51,500 \n",
"4 $51,500 \n",
"5 $51,500 \n",
"6 $51,500 \n",
"7 $51,500 \n",
"8 $51,500 \n",
"9 $51,500 \n",
"10 $51,500 \n",
"11 $51,500 \n",
"12 $51,500 \n",
"13 $51,500 \n",
"14 $51,500 \n",
"15 $51,500 \n",
"16 $51,500 \n",
"17 $51,500 \n",
"18 $51,500 \n",
"19 $51,500 \n",
"20 $51,500 \n",
"21 $51,500 \n",
"22 $51,500 \n",
"23 $51,500 \n",
"24 $51,500 \n",
"25 $51,500 \n",
"26 $51,500 \n",
"27 $51,500 \n",
"\n",
" 2011 Est. Tract Median Family Income 2000 Tract Median Family Income \\\n",
"0 $38,228 $29,615 \n",
"1 $44,496 $34,468 \n",
"2 $30,524 $23,644 \n",
"3 $39,264 $30,417 \n",
"4 $20,878 $16,172 \n",
"5 $15,635 $12,113 \n",
"6 $37,399 $28,971 \n",
"7 $75,973 $58,854 \n",
"8 $73,305 $56,786 \n",
"9 $62,933 $48,750 \n",
"10 $52,525 $40,688 \n",
"11 $36,601 $28,355 \n",
"12 $47,771 $37,005 \n",
"13 $51,953 $40,247 \n",
"14 $47,859 $37,074 \n",
"15 $62,634 $48,520 \n",
"16 $52,097 $40,359 \n",
"17 $50,022 $38,750 \n",
"18 $52,025 $40,302 \n",
"19 $21,924 $16,985 \n",
"20 $86,453 $66,970 \n",
"21 $47,385 $36,708 \n",
"22 $48,559 $37,618 \n",
"23 $38,831 $30,081 \n",
"24 $53,467 $41,417 \n",
"25 $58,865 $45,600 \n",
"26 $49,064 $38,008 \n",
"27 $51,886 $40,194 \n",
"\n",
" Tract Population Tract Minority % Minority Population \\\n",
"0 2279 45.77 1043 \n",
"1 3084 42.44 1309 \n",
"2 3545 78.87 2796 \n",
"3 2777 59.42 1650 \n",
"4 2372 93.55 2219 \n",
"5 2439 77.74 1896 \n",
"6 1475 55.05 812 \n",
"7 3705 16.22 601 \n",
"8 5538 15.98 885 \n",
"9 6096 16.44 1002 \n",
"10 7545 22.29 1682 \n",
"11 2481 4.51 112 \n",
"12 3545 23.30 826 \n",
"13 5471 8.59 470 \n",
"14 3822 11.09 424 \n",
"15 6530 19.91 1300 \n",
"16 6653 12.63 840 \n",
"17 19 15.79 3 \n",
"18 5539 6.32 350 \n",
"19 2637 30.94 816 \n",
"20 2651 13.43 356 \n",
"21 4569 25.52 1166 \n",
"22 3784 4.97 188 \n",
"23 4043 15.38 622 \n",
"24 4357 4.50 196 \n",
"25 6635 9.25 614 \n",
"26 4395 3.89 171 \n",
"27 4263 7.08 302 \n",
"\n",
" Owner Occupied Units 1- to 4- Family Units \n",
"0 431 894 \n",
"1 881 1543 \n",
"2 637 1562 \n",
"3 908 1335 \n",
"4 597 1306 \n",
"5 363 736 \n",
"6 293 674 \n",
"7 1214 1606 \n",
"8 1660 2226 \n",
"9 1667 2398 \n",
"10 2219 3155 \n",
"11 763 1187 \n",
"12 1166 1581 \n",
"13 1839 2311 \n",
"14 1199 1756 \n",
"15 2091 2651 \n",
"16 2087 2894 \n",
"17 5 446 \n",
"18 1815 2321 \n",
"19 199 570 \n",
"20 868 1145 \n",
"21 947 1759 \n",
"22 1286 1752 \n",
"23 1206 1877 \n",
"24 1447 1807 \n",
"25 2132 2684 \n",
"26 1462 1844 \n",
"27 1463 1918 "
]
},
"execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url = 'http://www.ffiec.gov/census/report.aspx?year=2011&state=01&report=demographic&msa=11500'\n",
"html = requests.get(url).content\n",
"df_list = pd.read_html(html)\n",
"df = df_list[-1] # Nos quedamos con la ultima tabla descargada\n",
"\n",
"df.to_csv('tabla descargada de internet.csv')\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "f67b6ea5",
"metadata": {},
"source": [
"Concatenación de tablas (iguales) descargadas desde internet..."
]
},
{
"cell_type": "code",
"execution_count": 209,
"id": "6928a549",
"metadata": {},
"outputs": [],
"source": [
"urls = [\"http://www.marca.com/estadisticas/futbol/primera/2016_17/jornada_38/\",\n",
" \"http://www.marca.com/estadisticas/futbol/primera/2017_18/jornada_38/\",\n",
" \"http://www.marca.com/estadisticas/futbol/primera/2018_19/jornada_38/\",\n",
" \"http://www.marca.com/estadisticas/futbol/primera/2019_20/jornada_38/\",\n",
" \"http://www.marca.com/estadisticas/futbol/primera/2020_21/jornada_38/\"]"
]
},
{
"cell_type": "code",
"execution_count": 231,
"id": "12bae9ec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n",
"1\n",
"2\n",
"3\n",
"4\n"
]
}
],
"source": [
"tablas = []\n",
"for a in range(len(urls)):\n",
" print(a)\n",
" html = requests.get(urls[a]).content\n",
" df_list = pd.read_html(html)\n",
" tablas.append(df_list[-1]) # Nos quedamos con la ultima tabla descargada\n"
]
},
{
"cell_type": "code",
"execution_count": 233,
"id": "39f600e7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Clasificación | \n",
" Clasificación.1 | \n",
" P.J. | \n",
" P.G. | \n",
" P.E. | \n",
" P.P. | \n",
" G.F. | \n",
" G.C. | \n",
" Ptos | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Barcelona | \n",
" 38 | \n",
" 26 | \n",
" 9 | \n",
" 3 | \n",
" 90 | \n",
" 36 | \n",
" 87 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Atlético | \n",
" 38 | \n",
" 22 | \n",
" 10 | \n",
" 6 | \n",
" 55 | \n",
" 29 | \n",
" 76 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" R. Madrid | \n",
" 38 | \n",
" 21 | \n",
" 5 | \n",
" 12 | \n",
" 63 | \n",
" 46 | \n",
" 68 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Valencia | \n",
" 37 | \n",
" 15 | \n",
" 15 | \n",
" 7 | \n",
" 51 | \n",
" 35 | \n",
" 60 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Sevilla | \n",
" 38 | \n",
" 17 | \n",
" 8 | \n",
" 13 | \n",
" 62 | \n",
" 47 | \n",
" 59 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" Getafe | \n",
" 37 | \n",
" 15 | \n",
" 13 | \n",
" 9 | \n",
" 48 | \n",
" 35 | \n",
" 58 | \n",
"
\n",
" \n",
" | 6 | \n",
" 7 | \n",
" Espanyol | \n",
" 38 | \n",
" 14 | \n",
" 11 | \n",
" 13 | \n",
" 48 | \n",
" 50 | \n",
" 53 | \n",
"
\n",
" \n",
" | 7 | \n",
" 8 | \n",
" Athletic | \n",
" 38 | \n",
" 13 | \n",
" 14 | \n",
" 11 | \n",
" 41 | \n",
" 45 | \n",
" 53 | \n",
"
\n",
" \n",
" | 8 | \n",
" 9 | \n",
" R. Sociedad | \n",
" 38 | \n",
" 13 | \n",
" 11 | \n",
" 14 | \n",
" 45 | \n",
" 46 | \n",
" 50 | \n",
"
\n",
" \n",
" | 9 | \n",
" 10 | \n",
" Betis | \n",
" 38 | \n",
" 14 | \n",
" 8 | \n",
" 16 | \n",
" 44 | \n",
" 52 | \n",
" 50 | \n",
"
\n",
" \n",
" | 10 | \n",
" 11 | \n",
" Alavés | \n",
" 38 | \n",
" 13 | \n",
" 11 | \n",
" 14 | \n",
" 39 | \n",
" 50 | \n",
" 50 | \n",
"
\n",
" \n",
" | 11 | \n",
" 12 | \n",
" Eibar | \n",
" 38 | \n",
" 11 | \n",
" 14 | \n",
" 13 | \n",
" 46 | \n",
" 50 | \n",
" 47 | \n",
"
\n",
" \n",
" | 12 | \n",
" 13 | \n",
" Leganés | \n",
" 38 | \n",
" 11 | \n",
" 12 | \n",
" 15 | \n",
" 37 | \n",
" 43 | \n",
" 45 | \n",
"
\n",
" \n",
" | 13 | \n",
" 14 | \n",
" Villarreal | \n",
" 38 | \n",
" 10 | \n",
" 14 | \n",
" 14 | \n",
" 49 | \n",
" 52 | \n",
" 44 | \n",
"
\n",
" \n",
" | 14 | \n",
" 15 | \n",
" Levante | \n",
" 38 | \n",
" 11 | \n",
" 11 | \n",
" 16 | \n",
" 59 | \n",
" 66 | \n",
" 44 | \n",
"
\n",
" \n",
" | 15 | \n",
" 16 | \n",
" Valladolid | \n",
" 38 | \n",
" 10 | \n",
" 11 | \n",
" 17 | \n",
" 32 | \n",
" 51 | \n",
" 41 | \n",
"
\n",
" \n",
" | 16 | \n",
" 17 | \n",
" Celta | \n",
" 38 | \n",
" 10 | \n",
" 11 | \n",
" 17 | \n",
" 53 | \n",
" 62 | \n",
" 41 | \n",
"
\n",
" \n",
" | 17 | \n",
" 18 | \n",
" Girona | \n",
" 38 | \n",
" 9 | \n",
" 10 | \n",
" 19 | \n",
" 37 | \n",
" 53 | \n",
" 37 | \n",
"
\n",
" \n",
" | 18 | \n",
" 19 | \n",
" Huesca | \n",
" 37 | \n",
" 7 | \n",
" 11 | \n",
" 19 | \n",
" 43 | \n",
" 65 | \n",
" 32 | \n",
"
\n",
" \n",
" | 19 | \n",
" 20 | \n",
" Rayo | \n",
" 37 | \n",
" 8 | \n",
" 7 | \n",
" 22 | \n",
" 41 | \n",
" 70 | \n",
" 31 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Clasificación Clasificación.1 P.J. P.G. P.E. P.P. G.F. G.C. Ptos\n",
"0 1 Barcelona 38 26 9 3 90 36 87\n",
"1 2 Atlético 38 22 10 6 55 29 76\n",
"2 3 R. Madrid 38 21 5 12 63 46 68\n",
"3 4 Valencia 37 15 15 7 51 35 60\n",
"4 5 Sevilla 38 17 8 13 62 47 59\n",
"5 6 Getafe 37 15 13 9 48 35 58\n",
"6 7 Espanyol 38 14 11 13 48 50 53\n",
"7 8 Athletic 38 13 14 11 41 45 53\n",
"8 9 R. Sociedad 38 13 11 14 45 46 50\n",
"9 10 Betis 38 14 8 16 44 52 50\n",
"10 11 Alavés 38 13 11 14 39 50 50\n",
"11 12 Eibar 38 11 14 13 46 50 47\n",
"12 13 Leganés 38 11 12 15 37 43 45\n",
"13 14 Villarreal 38 10 14 14 49 52 44\n",
"14 15 Levante 38 11 11 16 59 66 44\n",
"15 16 Valladolid 38 10 11 17 32 51 41\n",
"16 17 Celta 38 10 11 17 53 62 41\n",
"17 18 Girona 38 9 10 19 37 53 37\n",
"18 19 Huesca 37 7 11 19 43 65 32\n",
"19 20 Rayo 37 8 7 22 41 70 31"
]
},
"execution_count": 233,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tablas[2]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}