247 lines
10 KiB
Python
247 lines
10 KiB
Python
import locale
|
|
import re
|
|
import urllib.request
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import tkinter as tk
|
|
from tkinter import messagebox, ttk
|
|
from tkinter import Tk
|
|
from tkinter.scrolledtext import ScrolledText
|
|
import shutil, re, os
|
|
|
|
from bs4 import BeautifulSoup
|
|
from whoosh.index import create_in,open_dir
|
|
from whoosh.fields import Schema, TEXT, DATETIME, KEYWORD, ID, NUMERIC
|
|
from whoosh.qparser import QueryParser
|
|
from whoosh import index, qparser, query
|
|
|
|
BASE_URL = "https://recetas.elperiodico.com"
|
|
RECIPES_URL = BASE_URL + "/Recetas-de-Aperitivos-tapas-listado_receta-1_1.html"
|
|
DATA_DIR = Path(__file__).parent / "index"
|
|
|
|
def init_ssl():
|
|
import os, ssl
|
|
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
|
|
getattr(ssl, '_create_unverified_context', None)):
|
|
ssl._create_default_https_context = ssl._create_unverified_context
|
|
|
|
def create_index():
|
|
if not os.path.exists(DATA_DIR):
|
|
os.mkdir(DATA_DIR)
|
|
|
|
if not index.exists_in(DATA_DIR, indexname="RecipesIndex"):
|
|
schema = Schema(
|
|
title=TEXT(stored=True),
|
|
difficulty=TEXT(stored=True),
|
|
duration=TEXT(stored=True),
|
|
units=NUMERIC(stored=True, numtype=int),
|
|
author=ID(stored=True),
|
|
updated_at=DATETIME(stored=True),
|
|
features=KEYWORD(stored=True, commas=True),
|
|
intro=TEXT(stored=True)
|
|
)
|
|
idx = create_in(DATA_DIR, schema=schema, indexname="RecipesIndex")
|
|
print(f"Created index: {idx.indexname}")
|
|
else:
|
|
print(f"An index already exists")
|
|
|
|
def parse_duration(duration):
|
|
if not duration:
|
|
return None
|
|
|
|
duration = duration.strip().lower()
|
|
|
|
hours = 0
|
|
minutes = 0
|
|
|
|
h_match = re.search(r"(\d+)h", duration)
|
|
m_match = re.search(r"(\d+)m", duration)
|
|
|
|
if h_match:
|
|
hours = int(h_match.group(1))
|
|
|
|
if m_match:
|
|
minutes = int(m_match.group(1))
|
|
|
|
return hours * 60 + minutes
|
|
|
|
def parse_duration_inverse(minutes):
|
|
if minutes is None:
|
|
return None
|
|
m = minutes % 60
|
|
h = (minutes - m) // 60
|
|
return f"{h}h {m}m" if h != 0 else f"{m}m"
|
|
|
|
def persist_recipes():
|
|
idx = index.open_dir(DATA_DIR, "RecipesIndex")
|
|
writer = idx.writer()
|
|
count = 0
|
|
f = urllib.request.urlopen(RECIPES_URL)
|
|
bs = BeautifulSoup(f, "lxml")
|
|
results = bs.find_all("div", attrs={"data-js-selector": "resultado"})
|
|
for div in results:
|
|
title_a = div.a
|
|
title = div.a.string.strip()
|
|
info_div = div.find("div", class_="info_snippet")
|
|
difficulty = info_div.find("span").get_text(strip=True) if info_div and info_div.find("span") else "Unknown"
|
|
intro = div.find("div", class_="intro").get_text()
|
|
properties = div.find("div", class_="properties")
|
|
duration = properties.find("span", class_="duracion").string.strip() if properties and properties.find("span", class_="duracion") else "Unknown"
|
|
units = int(properties.find("span", class_="unidades").string.strip()) if properties and properties.find("span", class_="unidades") else -1
|
|
details_link = title_a["href"]
|
|
f2 = urllib.request.urlopen(details_link)
|
|
bs2 = BeautifulSoup(f2, "lxml")
|
|
details = bs2.find("div", class_="autor").find("div", class_="nombre_autor")
|
|
author = details.find("a").string
|
|
date_str = details.find("span").string.replace("Actualizado: ", "")
|
|
updated_at = datetime.strptime(date_str, "%d %B %Y")
|
|
features = bs2.find("div", class_=["properties", "inline"]).get_text(strip=True).replace("Características adicionales:", "") if bs2.find("div", class_=["properties", "inline"]) else "Unknown"
|
|
|
|
writer.add_document(
|
|
title=title,
|
|
difficulty=difficulty,
|
|
duration=duration,
|
|
units=units,
|
|
author=author,
|
|
updated_at=updated_at,
|
|
features=features,
|
|
intro=intro
|
|
)
|
|
|
|
count += 1
|
|
|
|
writer.commit()
|
|
|
|
return count
|
|
|
|
class RecipesUI():
|
|
def __init__(self, root, title = "AII"):
|
|
self.root = root
|
|
self.root.title(title)
|
|
self.root.geometry("900x600")
|
|
|
|
# Menu Principal
|
|
self.menu = tk.Menu(self.root)
|
|
self.root.config(menu=self.menu)
|
|
|
|
# Menu Datos
|
|
datos_menu = tk.Menu(self.menu, tearoff=0)
|
|
datos_menu.add_command(label="Cargar", command=lambda: self.callback("load"))
|
|
datos_menu.add_command(label="Listar", command=lambda: self.callback("list_recipes"))
|
|
datos_menu.add_separator()
|
|
datos_menu.add_command(label="Salir", command=self.root.quit)
|
|
self.menu.add_cascade(label="Datos", menu=datos_menu)
|
|
|
|
# Menu Buscar
|
|
buscar_menu = tk.Menu(self.menu, tearoff=0)
|
|
buscar_menu.add_command(label="Título o Introducción", command=lambda: self.callback("search_title_or_intro"))
|
|
buscar_menu.add_command(label="Fecha", command=lambda: self.callback("search_updated_at"))
|
|
buscar_menu.add_command(label="Características y Título", command=lambda: self.callback("search_features_and_title"))
|
|
self.menu.add_cascade(label="Buscar", menu=buscar_menu)
|
|
|
|
# Callback externo desde el punto de entrada
|
|
self.callback = None
|
|
|
|
def show_list(self, items, fields, title="Listado"):
|
|
mw = tk.Toplevel(self.root)
|
|
mw.title(title)
|
|
listbox = tk.Listbox(mw, width=80, height=20)
|
|
listbox.pack(side="left", fill="both", expand=True)
|
|
scrollbar = tk.Scrollbar(mw)
|
|
scrollbar.pack(side="right", fill="y")
|
|
listbox.config(yscrollcommand=scrollbar.set)
|
|
scrollbar.config(command=listbox.yview)
|
|
|
|
for item in items:
|
|
row = " | ".join(str(item.get(field, "Unknown")) for field in fields)
|
|
listbox.insert("end", row)
|
|
|
|
def ask_text(self, label, callback):
|
|
mw = tk.Toplevel(self.root)
|
|
mw.title(label)
|
|
tk.Label(mw, text=label).pack(pady=5)
|
|
entry = ttk.Entry(mw)
|
|
entry.pack(pady=5)
|
|
ttk.Button(mw, text="Aceptar", command=
|
|
lambda: [callback(entry.get()), mw.destroy()]).pack(pady=10)
|
|
|
|
def ask_spinbox(self, label, options, callback):
|
|
mw = tk.Toplevel(self.root)
|
|
mw.title(label)
|
|
tk.Label(mw, text=label).pack(pady=5)
|
|
spinbox = ttk.Spinbox(mw, values=options, state="readonly", width=40)
|
|
spinbox.pack(pady=5)
|
|
ttk.Button(mw, text="Aceptar", command=
|
|
lambda: [callback(spinbox.get()), mw.destroy()]).pack(pady=10)
|
|
|
|
def ask_radiobutton(self, label, options, callback):
|
|
mw = tk.Toplevel(self.root)
|
|
mw.title(label)
|
|
tk.Label(mw, text=label).pack(pady=5)
|
|
sv = tk.StringVar(value=options[0])
|
|
for option in options:
|
|
tk.Radiobutton(mw, text=option, variable=sv, value=option).pack(anchor="w")
|
|
ttk.Button(mw, text="Aceptar", command=
|
|
lambda: [callback(sv.get()), mw.destroy()]).pack(pady=10)
|
|
|
|
def info(slef, message):
|
|
messagebox.showinfo("Información", message)
|
|
|
|
def main():
|
|
init_ssl()
|
|
locale.setlocale(locale.LC_TIME, "es_ES.UTF-8")
|
|
|
|
create_index()
|
|
root = Tk()
|
|
ui = RecipesUI(root)
|
|
|
|
def handle_action(action):
|
|
match(action):
|
|
case "load":
|
|
resp = messagebox.askyesno(title="Cargar", message="Quieres cargar todos los datos de nuevo?")
|
|
if resp:
|
|
recipes_count = persist_recipes()
|
|
ui.info(f"Se han indexado {recipes_count} recetas")
|
|
case "list_recipes":
|
|
ix = open_dir(DATA_DIR, "RecipesIndex")
|
|
with ix.searcher() as searcher:
|
|
recipes = searcher.search(query.Every(), limit=None)
|
|
clear = []
|
|
for r in recipes:
|
|
d = dict(r)
|
|
clear.append(d)
|
|
print(clear)
|
|
ui.show_list(clear, ["title", "difficulty", "units", "duration"])
|
|
# case "search_title_or_intro":
|
|
# def search_author(author):
|
|
# recipes = [recipe for recipe in dbm.get_all("recipes") if author.lower() in recipe["author"].lower()]
|
|
# for r in recipes:
|
|
# r["units"] = str(r["units"]) + " personas" if r["units"] is not None else "Unknown personas"
|
|
# r["duration"] = parse_duration_inverse(r["duration"])
|
|
# ui.show_list(recipes, ["title", "difficulty", "units", "duration", "author"])
|
|
# ui.ask_text("Buscar por autor: ", search_author)
|
|
# case "search_updated_at":
|
|
# def search_date(date):
|
|
# d = datetime.strptime(date, "%d/%m/%Y")
|
|
# recipes = [recipe for recipe in dbm.get_all("recipes")
|
|
# if d > datetime.strptime(recipe["updated_at"], "%Y-%m-%d %H:%M:%S")]
|
|
# for r in recipes:
|
|
# r["units"] = str(r["units"]) + " personas" if r["units"] is not None else "Unknown personas"
|
|
# r["duration"] = parse_duration_inverse(r["duration"])
|
|
# ui.show_list(recipes, ["title", "difficulty", "units", "duration", "updated_at"])
|
|
# ui.ask_text("Buscar por fecha: ", search_date)
|
|
# case "search_features_and_title":
|
|
# def search_author(author):
|
|
# recipes = [recipe for recipe in dbm.get_all("recipes") if author.lower() in recipe["author"].lower()]
|
|
# for r in recipes:
|
|
# r["units"] = str(r["units"]) + " personas" if r["units"] is not None else "Unknown personas"
|
|
# r["duration"] = parse_duration_inverse(r["duration"])
|
|
# ui.show_list(recipes, ["title", "difficulty", "units", "duration", "author"])
|
|
# ui.ask_text("Buscar por autor: ", search_author)
|
|
|
|
ui.callback = handle_action
|
|
root.mainloop()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|