diff --git a/ejercicios/beautifulsoup/ej4/data/recipes.bd b/ejercicios/beautifulsoup/ej4/data/recipes.bd new file mode 100644 index 0000000..861aaa4 Binary files /dev/null and b/ejercicios/beautifulsoup/ej4/data/recipes.bd differ diff --git a/ejercicios/beautifulsoup/ej4/src/__ssl.py b/ejercicios/beautifulsoup/ej4/src/__ssl.py new file mode 100644 index 0000000..475f052 --- /dev/null +++ b/ejercicios/beautifulsoup/ej4/src/__ssl.py @@ -0,0 +1,5 @@ +def init_ssl(): + import os, ssl + if (not os.environ.get('PYTHONHTTPSVERIFY', '') and + getattr(ssl, '_create_unverified_context', None)): + ssl._create_default_https_context = ssl._create_unverified_context diff --git a/ejercicios/beautifulsoup/ej4/src/config.py b/ejercicios/beautifulsoup/ej4/src/config.py new file mode 100644 index 0000000..c76e2b6 --- /dev/null +++ b/ejercicios/beautifulsoup/ej4/src/config.py @@ -0,0 +1,6 @@ +from pathlib import Path + +BASE_URL = "https://recetas.elperiodico.com" +RECIPES_URL = BASE_URL + "/Recetas-de-Aperitivos-tapas-listado_receta-1_1.html" +DATA_DIR = Path(__file__).parent.parent / "data" +DB_PATH = DATA_DIR / "recipes.bd" \ No newline at end of file diff --git a/ejercicios/beautifulsoup/ej4/src/db.py b/ejercicios/beautifulsoup/ej4/src/db.py new file mode 100644 index 0000000..88bd2b4 --- /dev/null +++ b/ejercicios/beautifulsoup/ej4/src/db.py @@ -0,0 +1,141 @@ +import sqlite3 +from pathlib import Path + + +class DBAttr: + def __init__(self, name, type_, modifier=""): + self.name = name + self.type_ = type_ + self.modifier = modifier + + def sql(self): + parts = [self.name, self.type_] + if self.modifier: + parts.append(self.modifier) + return " ".join(parts) + + +class DBManager: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, path): + self.path = Path(path) + self.conn = sqlite3.connect(self.path) + self.conn.row_factory = sqlite3.Row + + def create_table(self, table_name, attributes: list[DBAttr]): + columns_sql = ",\n ".join(attr.sql() for attr in attributes) + + query = f""" + CREATE TABLE IF NOT EXISTS {table_name} ( + {columns_sql} + ); + """ + + try: + with self.conn: + self.conn.execute(query) + except Exception as e: + print("Error creating table:", e) + + def get_all(self, table_name): + try: + cursor = self.conn.execute(f"SELECT * FROM {table_name};") + return [dict(row) for row in cursor.fetchall()] + except Exception as e: + print("Error selecting:", e) + return [] + + def get_singleton(self, singleton_table): + try: + cursor = self.conn.execute(f"SELECT * FROM {singleton_table}") + return [row[0] for row in cursor.fetchall()] + except Exception as e: + print("Error selecting:", e) + return [] + + def get_by(self, table_name, column, value): + try: + query = f"SELECT * FROM {table_name} WHERE {column} = ?;" + cursor = self.conn.execute(query, (value,)) + return [dict(row) for row in cursor.fetchall()] + except Exception as e: + print("Error selecting:", e) + return [] + + def insert(self, table_name, data: dict): + keys = ", ".join(data.keys()) + placeholders = ", ".join("?" for _ in data) + values = tuple(data.values()) + + query = f""" + INSERT INTO {table_name} ({keys}) + VALUES ({placeholders}); + """ + + try: + with self.conn: + self.conn.execute(query, values) + except Exception as e: + print("Error inserting:", e) + + def update(self, table_name, data: dict, where_column, where_value): + set_clause = ", ".join(f"{key} = ?" for key in data.keys()) + values = list(data.values()) + values.append(where_value) + + query = f""" + UPDATE {table_name} + SET {set_clause} + WHERE {where_column} = ?; + """ + + try: + with self.conn: + self.conn.execute(query, tuple(values)) + except Exception as e: + print("Error updating:", e) + + def delete(self, table_name, where_column, where_value): + query = f"DELETE FROM {table_name} WHERE {where_column} = ?;" + + try: + with self.conn: + self.conn.execute(query, (where_value,)) + except Exception as e: + print("Error deleting:", e) + + def clear(self, table_name): + query = f"DELETE FROM {table_name};" + + try: + with self.conn: + self.conn.execute(query) + except Exception as e: + print("Error clearing table: ", e) + + def exists(self, table_name, where_column, where_value): + query = f"SELECT 1 FROM {table_name} WHERE {where_column} = ? LIMIT 1;" + + try: + cursor = self.conn.execute(query, (where_value,)) + return cursor.fetchone() is not None + except Exception as e: + print("Error checking existence:", e) + return False + + def count(self, table_name): + try: + cursor = self.conn.execute(f"SELECT COUNT(*) as total FROM {table_name};") + return cursor.fetchone()["total"] + except Exception as e: + print("Error counting:", e) + return 0 + + def close(self): + self.conn.close() \ No newline at end of file diff --git a/ejercicios/beautifulsoup/ej4/src/main.py b/ejercicios/beautifulsoup/ej4/src/main.py new file mode 100644 index 0000000..b9f10f4 --- /dev/null +++ b/ejercicios/beautifulsoup/ej4/src/main.py @@ -0,0 +1,80 @@ +from bs4 import BeautifulSoup +import re +from tkinter import Tk +from tkinter import messagebox +import urllib.request +from datetime import datetime +import locale + +from db import DBManager, DBAttr +#from ui import RecipesUI +from __ssl import init_ssl +from config import * + +init_ssl() +locale.setlocale(locale.LC_TIME, "es_ES.UTF-8") + +dbm = DBManager(DB_PATH) + +def create_tables(): + recipes_attr = [ + DBAttr("title", "TEXT", "NOT NULL"), + DBAttr("difficulty", "TEXT", "DEFAULT NULL"), + DBAttr("units", "INTEGER", "DEFAULT NULL"), + DBAttr("duration", "INTEGER", "DEFAULT NULL"), + DBAttr("author", "TEXT", "NOT NULL"), + DBAttr("updated_at", "DATE", "NOT NULL") + ] + + dbm.create_table("recipes", recipes_attr) + +def persist_recipes(): + f = urllib.request.urlopen(RECIPES_URL) + bs = BeautifulSoup(f, "lxml") + results = bs.find_all("div", attrs={"data-js-selector": "resultado"}) + for div in results: + print(div) + title_a = div.a + title = title_a.string.strip() + info_div = div.find("div", class_="info_snippet") + difficulty = info_div.find("span").get_text(strip=True) if info_div and info_div.find("span") else None + properties = div.find("div", class_="properties") + duration = properties.find("span", class_=["property", "duracion"]).string.strip() if properties and properties.find("span", class_=["property", "duracion"]) else None + units = properties.find("span", class_=["property", "unidades"]).string.strip() if properties and properties.find("span", class_=["property", "unidades"]) else None + details_link = title_a["href"] + f2 = urllib.request.urlopen(details_link) + bs2 = BeautifulSoup(f2, "lxml") + details = bs2.find("div", class_="autor").find("div", class_="nombre_autor") + author = details.find("a").string + date_str = details.find("span").string + updated_at = datetime.strptime(date_str, "%d %B %Y") + + dbm.insert("recipes", { + "title": title, + "difficulty": difficulty, + "units": units, + "duration": duration, + "author": author, + "updated_at": updated_at + }) + + return dbm.count("recipes") + + +def main(): + create_tables() + recipes_count = persist_recipes() + print(recipes_count) + #root = Tk() + #ui = RecipesUI(root) + + # def handle_action(action): + + #ui.callback = handle_action + #root.mainloop() + #dbm.close() + + print(dbm.get_all("recipes")) + +if __name__ == "__main__": + main() \ No newline at end of file