diff --git a/exercises/information_retrieval/ej1/data/contacts/agenda.txt b/exercises/information_retrieval/ej1/data/contacts/agenda.txt new file mode 100644 index 0000000..3d48b4b --- /dev/null +++ b/exercises/information_retrieval/ej1/data/contacts/agenda.txt @@ -0,0 +1,8 @@ +unoarrobagmail.com +Antonio Garcia +dosarrobagmail.com +Pedro Guerra +tresarrobagmail.com +Ana Montero +cuatroarrobagmail.com +Luis Pontes \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/1.txt b/exercises/information_retrieval/ej1/data/emails/1.txt new file mode 100644 index 0000000..f636f4d --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/1.txt @@ -0,0 +1,10 @@ +unoarrobagmail.com +dosarrobagmail.com tresarrobagmail.com +20101015 +Contrato de compraventa con la constructora +Estimados socios: + +ya hemos firmado el contrato de compraventa con el cliente preferencial. +Espero noticias vuestras. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/2.txt b/exercises/information_retrieval/ej1/data/emails/2.txt new file mode 100644 index 0000000..e61b4dd --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/2.txt @@ -0,0 +1,10 @@ +dosarrobagmail.com +unoarrobagmail.com +20100410 +Retraso en la firma del Contrato +Estimados Antonio: + +agradezco mucho tus buenas noticias, aunque me temo que el documento que debe adjuntarse al contrato se va a retrasar +unos dias. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/3.txt b/exercises/information_retrieval/ej1/data/emails/3.txt new file mode 100644 index 0000000..5eb28d1 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/3.txt @@ -0,0 +1,10 @@ +tresarrobagmail.com +unoarrobagmail.com dosarrobagmail.com +20140225 +Transferencia realizada +Estimados socios: + +aunque el contrato no este legalizado aun, me he permitido hacer una transferencia por +la mitad del importe al contratista. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/4.txt b/exercises/information_retrieval/ej1/data/emails/4.txt new file mode 100644 index 0000000..7a52648 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/4.txt @@ -0,0 +1,8 @@ +unoarrobagmail.com +tresarrobagmail.com dosarrobagmail.com +20110114 +Lo comunicare al cliente +Estimados socios: + +muchas gracias por las gestiones. se lo comunicare al cliente hoy mismo. +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/5.txt b/exercises/information_retrieval/ej1/data/emails/5.txt new file mode 100644 index 0000000..27c82c4 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/5.txt @@ -0,0 +1,9 @@ +unoarrobagmail.com +cuatroarrobagmail.com +20130912 +Contrato y Transferencia +Estimado Luis: + +ya hemos realizado una transferencia a su cuenta por el importe establecido inicialmente. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/6.txt b/exercises/information_retrieval/ej1/data/emails/6.txt new file mode 100644 index 0000000..d3cc028 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/6.txt @@ -0,0 +1,6 @@ +cuatroarrobagmail.com +unoarrobagmail.com +20131105 +Gracias + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg b/exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg new file mode 100644 index 0000000..143dd62 Binary files /dev/null and b/exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg differ diff --git a/exercises/information_retrieval/ej1/index/EmailIndex_WRITELOCK b/exercises/information_retrieval/ej1/index/EmailIndex_WRITELOCK new file mode 100755 index 0000000..e69de29 diff --git a/exercises/information_retrieval/ej1/index/_EmailIndex_1.toc b/exercises/information_retrieval/ej1/index/_EmailIndex_1.toc new file mode 100644 index 0000000..8b124d6 Binary files /dev/null and b/exercises/information_retrieval/ej1/index/_EmailIndex_1.toc differ diff --git a/exercises/information_retrieval/ej1/main.py b/exercises/information_retrieval/ej1/main.py new file mode 100644 index 0000000..fcd8798 --- /dev/null +++ b/exercises/information_retrieval/ej1/main.py @@ -0,0 +1,193 @@ +import locale +import re +import urllib.request +from datetime import datetime +from pathlib import Path +import tkinter as tk +from tkinter import messagebox, ttk +from tkinter import Tk +from tkinter.scrolledtext import ScrolledText +import shutil, re, os + +from whoosh.index import create_in,open_dir +from whoosh.fields import Schema, TEXT, DATETIME, KEYWORD, ID, NUMERIC +from whoosh.qparser import QueryParser +from whoosh import index, qparser, query + +DATA_DIR = Path(__file__).parent / "data" +CONTACTS_DIR = DATA_DIR / "contacts" +EMAILS_DIR = DATA_DIR / "emails" +INDEX_DIR = Path(__file__).parent / "index" +CONTACTS = {} + +def create_index(): + if not os.path.exists(INDEX_DIR): + os.mkdir(INDEX_DIR) + + if not index.exists_in(INDEX_DIR, indexname="EmailIndex"): + schema = Schema(sender=TEXT(stored=True), + receiver=KEYWORD(stored=True), + date=DATETIME(stored=True), + subject=TEXT(stored=True), + body=TEXT(stored=True,phrase=False), + file_name=ID(stored=True)) + idx = create_in(INDEX_DIR, schema=schema, indexname="EmailIndex") + print(f"Created index: {idx.indexname}") + else: + print(f"An index already exists") + +def add_to_index(writer, path, file_name): + try: + f = open(path, "r") + sender = f.readline().strip() + receiver = f.readline().strip() + date_raw = f.readline().strip() + date = datetime.strptime(date_raw, '%Y%m%d') + subject = f.readline().strip() + body = f.read() + f.close() + + writer.add_document( + sender=sender, + receiver=receiver, + date=date, + subject=subject, + body=body, + file_name=file_name + ) + except: + messagebox.showerror(f"[ERR] adding {path}/{file_name}") + +def index_emails(delete = False): + if delete: + shutil.rmtree(INDEX_DIR) + os.mkdir(INDEX_DIR) + create_index() + + idx = index.open_dir(INDEX_DIR, "EmailIndex") + writer = idx.writer() + count = 0 + for f in os.listdir(EMAILS_DIR): + if not os.path.isdir(EMAILS_DIR / f): + add_to_index(writer, EMAILS_DIR / f, f) + count += 1 + + writer.commit() + return count + +def create_contacts(): + try: + f = open(CONTACTS_DIR / "agenda.txt", "r") + email = f.readline() + while email: + name = f.readline() + CONTACTS[email.strip()] = name.strip() + email = f.readline() + except: + messagebox.showerror(f"[ERR] creating contacts list") + +def load(delete = False): + create_contacts() + return index_emails(delete) + +class EmailsUI(): + def __init__(self, root, title = "AII"): + self.root = root + self.root.title(title) + self.root.geometry("900x600") + + # Menu Principal + self.menu = tk.Menu(self.root) + self.root.config(menu=self.menu) + + # Menu Datos + datos_menu = tk.Menu(self.menu, tearoff=0) + datos_menu.add_command(label="Cargar", command=lambda: self.callback("load")) + datos_menu.add_command(label="Listar", command=lambda: self.callback("list")) + datos_menu.add_separator() + datos_menu.add_command(label="Salir", command=self.root.quit) + self.menu.add_cascade(label="Datos", menu=datos_menu) + + # Menu Buscar + buscar_menu = tk.Menu(self.menu, tearoff=0) + buscar_menu.add_command(label="Cuerpo o Asunto", command=lambda: self.callback("search_body_or_subject")) + buscar_menu.add_command(label="Fecha", command=lambda: self.callback("search_date")) + buscar_menu.add_command(label="Spam", command=lambda: self.callback("search_spam")) + self.menu.add_cascade(label="Buscar", menu=buscar_menu) + + # Callback externo desde el punto de entrada + self.callback = None + + def show_list(self, items, fields, title="Listado"): + mw = tk.Toplevel(self.root) + mw.title(title) + listbox = tk.Listbox(mw, width=80, height=20) + listbox.pack(side="left", fill="both", expand=True) + scrollbar = tk.Scrollbar(mw) + scrollbar.pack(side="right", fill="y") + listbox.config(yscrollcommand=scrollbar.set) + scrollbar.config(command=listbox.yview) + + for item in items: + row = " | ".join(str(item.get(field, "Unknown")) for field in fields) + listbox.insert("end", row) + + def ask_text(self, label, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + entry = ttk.Entry(mw) + entry.pack(pady=5) + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(entry.get()), mw.destroy()]).pack(pady=10) + + def ask_spinbox(self, label, options, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + spinbox = ttk.Spinbox(mw, values=options, state="readonly", width=40) + spinbox.pack(pady=5) + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(spinbox.get()), mw.destroy()]).pack(pady=10) + + def ask_radiobutton(self, label, options, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + sv = tk.StringVar(value=options[0]) + for option in options: + tk.Radiobutton(mw, text=option, variable=sv, value=option).pack(anchor="w") + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(sv.get()), mw.destroy()]).pack(pady=10) + + def info(slef, message): + messagebox.showinfo("Información", message) + +def main(): + locale.setlocale(locale.LC_TIME, "es_ES.UTF-8") + + create_index() + root = Tk() + ui = EmailsUI(root) + + def handle_action(action): + match(action): + case "load": + resp = messagebox.askyesno(title="Cargar", message="Quieres cargar todos los datos de nuevo?") + if resp: + recipes_count = load(True) + ui.info(f"Se han indexado {recipes_count} emails") + case "list": + ix = open_dir(INDEX_DIR, "EmailIndex") + with ix.searcher() as searcher: + emails = searcher.search(query.Every(), limit=None) + print(emails) + ui.show_list(emails, ["sender", "receiver", "name", "subject", "body"]) + # buscar con queries y tal... + + ui.callback = handle_action + root.mainloop() + +if __name__ == "__main__": + main() + \ No newline at end of file diff --git a/exercises/information_retrieval/ej4/main.py b/exercises/information_retrieval/ej4/main.py index 59e73a9..7078603 100644 --- a/exercises/information_retrieval/ej4/main.py +++ b/exercises/information_retrieval/ej4/main.py @@ -17,7 +17,7 @@ from whoosh import index, qparser, query BASE_URL = "https://recetas.elperiodico.com" RECIPES_URL = BASE_URL + "/Recetas-de-Aperitivos-tapas-listado_receta-1_1.html" -DATA_DIR = Path(__file__).parent.parent / "index" +DATA_DIR = Path(__file__).parent / "index" def init_ssl(): import os, ssl