Add: exercise 1 BeautifulSoup

This commit is contained in:
Jose
2026-02-16 02:40:41 +01:00
parent 2dbbf387d6
commit 65b2322022
8 changed files with 106 additions and 30 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"python.REPL.enableREPLSmartSend": false
}

Binary file not shown.

View File

@@ -1,6 +1,6 @@
from pathlib import Path from pathlib import Path
URL = "https://www.vinissimus.com/es/vinos/tinto/?cursor=0" URL = "https://www.vinissimus.com/es/vinos/tinto/?cursor="
DATA_DIR = Path(__file__).parent.parent / "data" DATA_DIR = Path(__file__).parent.parent / "data"
CSV_PATH = DATA_DIR / "books.csv" CSV_PATH = DATA_DIR / "books.csv"
DB_PATH = DATA_DIR / "books.bd" DB_PATH = DATA_DIR / "books.bd"

View File

@@ -51,6 +51,14 @@ class DBManager:
print("Error selecting:", e) print("Error selecting:", e)
return [] return []
def get_singleton(self, singleton_table):
try:
cursor = self.conn.execute(f"SELECT * FROM {singleton_table}")
return [row[0] for row in cursor.fetchall()]
except Exception as e:
print("Error selecting:", e)
return []
def get_by(self, table_name, column, value): def get_by(self, table_name, column, value):
try: try:
query = f"SELECT * FROM {table_name} WHERE {column} = ?;" query = f"SELECT * FROM {table_name} WHERE {column} = ?;"
@@ -102,6 +110,15 @@ class DBManager:
except Exception as e: except Exception as e:
print("Error deleting:", e) print("Error deleting:", e)
def clear(self, table_name):
query = f"DELETE FROM {table_name};"
try:
with self.conn:
self.conn.execute(query)
except Exception as e:
print("Error clearing table: ", e)
def exists(self, table_name, where_column, where_value): def exists(self, table_name, where_column, where_value):
query = f"SELECT 1 FROM {table_name} WHERE {where_column} = ? LIMIT 1;" query = f"SELECT 1 FROM {table_name} WHERE {where_column} = ? LIMIT 1;"

View File

@@ -1,16 +1,17 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
from tkinter import Tk
from tkinter import messagebox
import urllib.request
from db import DBManager, DBAttr from db import DBManager, DBAttr
from ui import WinesUI from ui import WinesUI
from req import Requester
from __ssl import init_ssl from __ssl import init_ssl
from config import * from config import *
init_ssl() init_ssl()
dbm = DBManager(DB_PATH) dbm = DBManager(DB_PATH)
req = Requester()
def create_tables(): def create_tables():
wines_attr = [ wines_attr = [
@@ -28,8 +29,82 @@ def create_tables():
dbm.create_table("wines", wines_attr) dbm.create_table("wines", wines_attr)
dbm.create_table("types", types_attr) dbm.create_table("types", types_attr)
def extract_wines():
l = []
for i in range(0,3):
f = urllib.request.urlopen(URL+str(i*36))
doc = BeautifulSoup(f, "lxml")
page = doc.find_all("div", class_="product-list-item")
l.extend(page)
return l
def persist_wines(wines):
types = set()
for wine in wines:
details = wine.find("div",class_=["details"])
name = details.a.h2.string.strip()
price = list(wine.find("p",class_=["price"]).stripped_strings)[0]
origin = details.find("div",class_=["region"]).string.strip()
cellar = details.find("div", class_=["cellar-name"]).string.strip()
grapes = "".join(details.find("div",class_=["tags"]).stripped_strings)
for g in grapes.split("/"):
types.add(g.strip())
disc = wine.find("p",class_=["price"]).find_next_sibling("p",class_="dto")
if disc:
price = list(disc.stripped_strings)[0]
dbm.insert("wines", {"name": name, "price": float(price.replace(',', '.')), "origin": origin, "cellar": cellar, "type": grapes})
for type in types:
dbm.insert("types", {"type": type})
return dbm.count("wines"), dbm.count("types")
def main(): def main():
pass create_tables()
root = Tk()
ui = WinesUI(root)
def handle_action(action):
match(action):
case "cargar":
resp = messagebox.askyesno(title="Cargar", message="Quieres cargar todos los datos de nuevo?")
if resp:
dbm.clear("wines")
dbm.clear("types")
wines = extract_wines()
wines_count, types_count = persist_wines(wines)
ui.info(f"Hay {wines_count} vinos y {types_count} uvas.")
case "listar":
wines = dbm.get_all("wines")
ui.show_list(wines, ["name", "price", "origin", "cellar", "type"])
case "buscar_denominacion":
origins = list({wine["origin"] for wine in dbm.get_all("wines")})
origins.sort()
def search_origin(origin):
wines = [wine for wine in dbm.get_all("wines") if wine["origin"] == origin]
ui.show_list(wines, ["name", "price", "origin", "cellar", "type"])
ui.ask_spinbox("Buscar por denominación: ", origins, search_origin)
case "buscar_precio":
def search_price(price):
wines = [wine for wine in dbm.get_all("wines") if float(wine["price"]) <= float(price)]
wines.sort(key=lambda w: float(w["price"]))
ui.show_list(wines, ["name", "price", "origin", "cellar", "type"])
ui.ask_text("Selecciona precio: ", search_price)
case "buscar_uva":
types = [t for t in dbm.get_singleton("types")]
types.sort()
def search_type(type):
wines = [wine for wine in dbm.get_all("wines") if type in wine["type"]]
ui.show_list(wines, ["name", "price", "origin", "cellar", "type"])
ui.ask_spinbox("Selecciona tip de uva: ", types, search_type)
ui.callback = handle_action
root.mainloop()
dbm.close()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -1,10 +0,0 @@
from urllib.request import urlopen, Request
class Requester():
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (compatible; Konqueror/3.5.8; Linux)"
}
def get(self, url):
return urlopen(Request(url, self.headers))

View File

@@ -15,26 +15,22 @@ class WinesUI():
# Menu Datos # Menu Datos
datos_menu = tk.Menu(self.menu, tearoff=0) datos_menu = tk.Menu(self.menu, tearoff=0)
datos_menu.add_command(label="Cargar", command=lambda: self.callback("cargar")) datos_menu.add_command(label="Cargar", command=lambda: self.callback("cargar"))
datos_menu.add_command(label="Listar", command=lambda: self.callback("listar"))
datos_menu.add_separator() datos_menu.add_separator()
datos_menu.add_command(label="Salir", command=self.root.quit) datos_menu.add_command(label="Salir", command=self.root.quit)
self.menu.add_cascade(label="Datos", menu=datos_menu) self.menu.add_cascade(label="Datos", menu=datos_menu)
# Menu Listar
listar_menu = tk.Menu(self.menu, tearoff=0)
listar_menu.add_command(label="Completo", command=lambda: self.callback("listar_completo"))
listar_menu.add_command(label="Ordenado", command=lambda: self.callback("listar_ordenado"))
self.menu.add_cascade(label="Listar", menu=listar_menu)
# Menu Buscar # Menu Buscar
buscar_menu = tk.Menu(self.menu, tearoff=0) buscar_menu = tk.Menu(self.menu, tearoff=0)
buscar_menu.add_command(label="Título", command=lambda: self.callback("buscar_titulo")) buscar_menu.add_command(label="Denominación", command=lambda: self.callback("buscar_denominacion"))
buscar_menu.add_command(label="Editorial", command=lambda: self.callback("buscar_editorial")) buscar_menu.add_command(label="Precio", command=lambda: self.callback("buscar_precio"))
buscar_menu.add_command(label="Uva", command=lambda: self.callback("buscar_uva"))
self.menu.add_cascade(label="Buscar", menu=buscar_menu) self.menu.add_cascade(label="Buscar", menu=buscar_menu)
# Callback externo desde el punto de entrada # Callback externo desde el punto de entrada
self.callback = None self.callback = None
def show_list(self, books, fields, title="Listado"): def show_list(self, items, fields, title="Listado"):
mw = tk.Toplevel(self.root) mw = tk.Toplevel(self.root)
mw.title(title) mw.title(title)
listbox = tk.Listbox(mw, width=80, height=20) listbox = tk.Listbox(mw, width=80, height=20)
@@ -44,8 +40,8 @@ class WinesUI():
listbox.config(yscrollcommand=scrollbar.set) listbox.config(yscrollcommand=scrollbar.set)
scrollbar.config(command=listbox.yview) scrollbar.config(command=listbox.yview)
for book in books: for item in items:
row = " | ".join(str(book[field]) for field in fields) row = " | ".join(str(item[field]) for field in fields)
listbox.insert("end", row) listbox.insert("end", row)
def ask_text(self, label, callback): def ask_text(self, label, callback):

View File

@@ -28,11 +28,6 @@ def main():
root = Tk() root = Tk()
ui = BooksUI(root) ui = BooksUI(root)
books = fr.read(CSV_PATH)
for book in books:
print(book)
def handle_action(action): def handle_action(action):
match(action): match(action):
case "cargar": case "cargar":