From 808ff8896ee1cad125cb16bae048743e7f7ca2ab Mon Sep 17 00:00:00 2001 From: Jose Date: Wed, 18 Mar 2026 19:55:21 +0100 Subject: [PATCH] add: ej1 whoosh --- .../ej1/data/contacts/agenda.txt | 8 + .../ej1/data/emails/1.txt | 10 + .../ej1/data/emails/2.txt | 10 + .../ej1/data/emails/3.txt | 10 + .../ej1/data/emails/4.txt | 8 + .../ej1/data/emails/5.txt | 9 + .../ej1/data/emails/6.txt | 6 + .../ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg | Bin 0 -> 27353 bytes .../ej1/index/EmailIndex_WRITELOCK | 0 .../ej1/index/_EmailIndex_1.toc | Bin 0 -> 2619 bytes exercises/information_retrieval/ej1/main.py | 193 ++++++++++++++++++ exercises/information_retrieval/ej4/main.py | 2 +- 12 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 exercises/information_retrieval/ej1/data/contacts/agenda.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/1.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/2.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/3.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/4.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/5.txt create mode 100644 exercises/information_retrieval/ej1/data/emails/6.txt create mode 100644 exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg create mode 100755 exercises/information_retrieval/ej1/index/EmailIndex_WRITELOCK create mode 100644 exercises/information_retrieval/ej1/index/_EmailIndex_1.toc create mode 100644 exercises/information_retrieval/ej1/main.py diff --git a/exercises/information_retrieval/ej1/data/contacts/agenda.txt b/exercises/information_retrieval/ej1/data/contacts/agenda.txt new file mode 100644 index 0000000..3d48b4b --- /dev/null +++ b/exercises/information_retrieval/ej1/data/contacts/agenda.txt @@ -0,0 +1,8 @@ +unoarrobagmail.com +Antonio Garcia +dosarrobagmail.com +Pedro Guerra +tresarrobagmail.com +Ana Montero +cuatroarrobagmail.com +Luis Pontes \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/1.txt b/exercises/information_retrieval/ej1/data/emails/1.txt new file mode 100644 index 0000000..f636f4d --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/1.txt @@ -0,0 +1,10 @@ +unoarrobagmail.com +dosarrobagmail.com tresarrobagmail.com +20101015 +Contrato de compraventa con la constructora +Estimados socios: + +ya hemos firmado el contrato de compraventa con el cliente preferencial. +Espero noticias vuestras. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/2.txt b/exercises/information_retrieval/ej1/data/emails/2.txt new file mode 100644 index 0000000..e61b4dd --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/2.txt @@ -0,0 +1,10 @@ +dosarrobagmail.com +unoarrobagmail.com +20100410 +Retraso en la firma del Contrato +Estimados Antonio: + +agradezco mucho tus buenas noticias, aunque me temo que el documento que debe adjuntarse al contrato se va a retrasar +unos dias. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/3.txt b/exercises/information_retrieval/ej1/data/emails/3.txt new file mode 100644 index 0000000..5eb28d1 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/3.txt @@ -0,0 +1,10 @@ +tresarrobagmail.com +unoarrobagmail.com dosarrobagmail.com +20140225 +Transferencia realizada +Estimados socios: + +aunque el contrato no este legalizado aun, me he permitido hacer una transferencia por +la mitad del importe al contratista. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/4.txt b/exercises/information_retrieval/ej1/data/emails/4.txt new file mode 100644 index 0000000..7a52648 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/4.txt @@ -0,0 +1,8 @@ +unoarrobagmail.com +tresarrobagmail.com dosarrobagmail.com +20110114 +Lo comunicare al cliente +Estimados socios: + +muchas gracias por las gestiones. se lo comunicare al cliente hoy mismo. +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/5.txt b/exercises/information_retrieval/ej1/data/emails/5.txt new file mode 100644 index 0000000..27c82c4 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/5.txt @@ -0,0 +1,9 @@ +unoarrobagmail.com +cuatroarrobagmail.com +20130912 +Contrato y Transferencia +Estimado Luis: + +ya hemos realizado una transferencia a su cuenta por el importe establecido inicialmente. + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/data/emails/6.txt b/exercises/information_retrieval/ej1/data/emails/6.txt new file mode 100644 index 0000000..d3cc028 --- /dev/null +++ b/exercises/information_retrieval/ej1/data/emails/6.txt @@ -0,0 +1,6 @@ +cuatroarrobagmail.com +unoarrobagmail.com +20131105 +Gracias + +Un saludo, \ No newline at end of file diff --git a/exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg b/exercises/information_retrieval/ej1/index/EmailIndex_8a9o49zh7h1n22jh.seg new file mode 100644 index 0000000000000000000000000000000000000000..143dd6283390dc214defac509680ce37427d0837 GIT binary patch literal 27353 zcmeHP3tWuZ`=5LFW@@_MlO&f`CDvk9iikwn+>)u8G&D_WMhPpCYY64%n%r{ES5w%N}ZyTq@~MP-Lf^0*j82~0(rKkjJU;t4-;@f+Q>AC zRF;*Xte*uL@Zup>GAl3;Ko*N@jw^wWVLAw|1aKLF3o1!bDO2S2t6;kWS*U`KIO-I7 zlVJTw2Rj>JC~pWZs6(AHMlE9nf*eR3itQxj7I-277ox#V?u|l9<*<}t$vL_m=O2y%s_0WF55h=jaGlG0g$+2D!B4cJ(N>OeArV`U@+-~z#t zC{LqkV)BDgI*^5aK$=EERmv2ZZYI!rfeWPI1(MMPaDyzQ3Cx=bcqA}!jC~Qu0DOYh zB}o!wY4rx1G1*5b3X$OfE@=V?bw)Rv9Fb#4!LvWytzu-Gs?B zGEL7Rab!y}HZZl-V_j(W8Ing6Y_e>mBw3z?M<$aRv$=Y$Vi+#CmM2IY4VPrKoFTF> ztIhjR>sRQO7j1Lk{Dnf^d^5msc|m=ykf$r^Z;=q%3*D+^s3cNtuWNO{`jHOdXv9FX zi$a}{)PQq^-|Hy>6kW5lO0bDotK>p$W{1^r)?Rg%AyxwK!>5g0wJ$m<+S@9*ySj0ElL2Ef(r$FYMLxjmL^Mq zQLptg(^OCa0G5FZbtu@127(+&9Ie-&2Ix7E z#BlP0eM4$cCdlFJ6$pxekd^{-utt!q`hy#EG^s(YbuW|QR=a^HG#m{@6__5i1Z~G- zMq67U0-V7-SS;YDlytHy>BhiTAlt3sq5_S9dB{}j20f#uOF#}b4Os$qyY;!&racga zu0_?WQzZ2^$7TtTh0Q^ht5bky(elaoUSRVE2xILT%P{p~jqHt0>s|ZfbQa-68Vf2x$^H!hdb$WF z(pZ=e9sKAl!ih8%=0gWLokcj2#=?B)P)uhLPNcCgA39v2vP3N8M7rlOA3DOd0P}{1 z1)WG|0YNf3h61F|f=;BfFdsU?w=8`YbRwOF`OtAE154vXnl~^XIzD1xX`D!7VLo(n zr?TMC)(mHQ3o#!$4Ps&eC(>C!kW5bV0K&XM=mJipvoIezon&GGC(>D%51ru4Ax#%> zBAtc#&>7C=G!}3oorU?(87}@9%MD~TPNcIiA3CpQWDQtGXTkbVa=r=xUDx2FbQb1A z7ZBtOto0sr7Un~jo{X$bx)n$}$6-En`I?cn(}%7L^Pvme5ny(;1X+y}=`75LE{~a6 z1L!PGeZS0q_Z#|y1u8f+JP)c z*>|)%7;S&Sa@b0c+&X|;XK#H$O(*GHAfFss3eP;%P>0Q4VX z_%SoE-gUc6W%x5QFb}xH!xv0Pdu9gKtL|{)iZKG18CZ|H&!93oFf*{;bU%a`w&2r) z#Z6F4RB1{{TADImGEyOtC;NaCMS9QTI$r^19Jt^%7}OwDg0}=JJunbF(rkdxbPsdT zH6S$`2yRkvS%VAERcSIhA3OpHG#leXM}gD~?hhs4Lh8Y$wL$f;ACY@uU z+^0gGDY!t>7>>4?)U}w2XfQ@KG~LlQqnejUo_G%jYEr;ki8?{0Ow%nUOhhjjb+CQJ z>kK5qGjZM7HU~Efwgx1xv)~3>i2RKmR5R&l2{-Ui2Qmgfch<^d(bV!Nl1J6c7;MMH zy?u~4G4Js;C!ypZV>2o6UWynf0Q65ZwK0Z`kXG=05*mRT1y1M!K@KF2Rt3z#hJn;7 z4cy@7loZ#!elzuVtEWH|)=sPueDZih*LfTc3NBb7yg+IVS4_x4b`ZxKI!)S!Vcu#D zcYBaSyy)|pU-T*3P_MUv?<*4?#UvpU?V)yUvRwQMl^ejl5M$~#_zIfvY;X6U{RmlM z=gKGy3WGBGLa&1)HXHe)2=*^X)-l8<{r=YHydhC)Kd)1lHl|481}^!>*&=8;NCp|i zC;dLW-(6&b>ETY&#g>Ew8y<8prj8dkiirniL&ry1A}oY`sZEK;OHjFExVBIa_eRw&J zIW8-a40kMEezW8)+B|V@eJckKBr>+H(LJNl__3O<70?4VZR>^-MZ6OH z(WGx;;T{PjhknE-ad#?uJAyza@pyTk?=9f21SE453q%gTcQSY_CGw`T-Fj|7*hr4c zoVrpdLozivChbNN*a0rBjLCJy4a9etf(uyx>XAa^g+YQ_&y@rgQEZvgZb&LoZcb?f zUJs75OxB}$M7*s$u?Au5CoQLEW)gCSqi}f~r#>&uBJ#$cM|vHG%LPaV>xoa|9)HYl zB7xX}csY*G)?J0~gdmA>H=R^XBe2bF;*)+q7*>a5#AqB4GT#@LqL{F}g0uLcW8j_~ zBw=Y@$@M$}^STkA^n2xbZ+Q9!62B+$N!BJFWQfn8-)EhpU88n zdbE4Bh`xD1Z%q#M={c|Ph}{3(ICg-6#CNIWjTQ+I?l z-^J2}K}pcQDkQ_PIrjM^gasjA-+Q)!AebRv61lm`FHpH%4r7I&Fg)1+&57I|xJ;#2 z>&-;oxDqcP8a-z_kr&ygO&yD{tpu0pWIv*e$eXst%l`^HepR%Cc*ZlGFUmmJs%~LO z^)8~ELungc&!T`RBxA8~?(J54iD$;exD1ZtcLbpL z^rG1av(0|#(ee~gZn)yFxY3gNXNhNa5f_)|R}k1D7MK72KZdA}HDAGLA2}_|JdZMF z+Cn`rd-4w#i6Vy`%MT2KCu<hDk!NpE zc#S}q^cgNm#}(V}R}-Z|N<1XW<50Q1d(OS5PYD@A^5965U3R*%29>`fKI!)fVXjDq zh1MBDMsZ|(9f7PV^X;)O_%DcZlQT>6MZXc)h`M&4z4sc)aCqjo5hnL6#FM|e@7%rZ z4e^XmnX6Mx#BYgmv+r=O`9lS=jf>_g;%QONEmUrX_OXt+I-Umeh`i%&tQ=wvDu;1T z;`^8k#AG>a6T(i`7;dYRcd8GQ+)zwjACoe0%#m1)$wN$*&jA?CIZl_b@+SZzd!3zi z<<0>Bqf{?O^*i?^rCF^1dQ7%ihP7EAlQI)AnZ{t!x22#Svd$A&oe+~UFR^lcjJPgl zIvE-!WjryN#$d9Zu2?+~ljQ?3tiWVGCPuNU`Q`zs&|u^%m;0DreN49Vt*)J});f6& zMl!B#F_}wFCb@mFdSn=$h}HR_7}@Aniq(Gts~=*rJvXp&h{^JoI_z$SVNadB`^Utn z&F%v+T?!26VtVVtsQvCMfIL~}S&R!YDRUnyhnOsXiJh zSh+qXWnzI0va8t~jJpowLQKk+WB3xlhJ>6)0PzX$G>q!^kYO?qqdGO;TcAf8OqLg7 z@_!eTx=vvA-vAhuH#gJOsljA600#h2h#;HDYFhM*T;xZ><#Q&92-s#&exosoGPw0w+DADcLVnZ&y?4Vm%%IH-QbJ)J^2&)W&FDW zOF@W0B`6WxF%TI98e|x(H@IRbF#Ozbl;H})O2c0#3vGf%UjX7kOyGka_9Z!R}qX?{s)A`B8{2up=`MfRe8 zqFm8_QLTlSMXbd_i_;eGEdwk^TduOaA~q9u7mpKf6W_OTwCZm)-KyNG&f3#D#(Jjp ze(NVTt~L^zVw>BxuC@}}V%ytxu67Z2Id=Q(YVFJ|ygRO@e9V}H*P;ElZ2Y-y{vV>fVOK9j_P;Q5%No_Ti!LwxFRxA> zmfUvE@S(pk%_nTMkrV+U_K*TQomzopCKjm>*>yY*<;CMTDrtE_}q zb34sDn0esdX|K(m{mzEu&8gdK9Ok5xXRR7Ju-*KM*h&7=PPnjU%4?y`F8S`S^4a)S>jp)o2l0q6|If+SU9z!d$j04 zbF8g`0wU4?3MvxX&!VY+Hw-+Nz&^ig&;1#|?`cFlnk+qHMM2 zLhIbgEsGvGkMn)=Zh6a~P6tZA;b*n+Szh;e%7gIX2X+p5K6m)3+Z8W&8CTEDcp3d7 zbZC)r&BJe>g-pBd;d%4Qcg@x{UvJfU*m%{%(W@U!@$C||{g*GCVs})$-|ErTaoFzh zZc)Dvno@HA_m_rN#Xa|?pI)u4CJbwi~`joz*wZJpBHt<4<$f3_ILI@rs>b zYUa{erRckA;SXaDj8yJc^S>F@GGW&HSyv*>Ph4NVctLWH->l|4y-Hj7Yuud6y-hQR ztMkj?unGCh{j{e!&(LgL>yXJ2!K+tqSTk$tJ)gA4edDF$2Va{WHaUdb?Z|^?36Iwm z&zQPw$;ir4_uf_?ic*&f1`aGe_q1fg$#KH*tvJ~oARQi*|ZK{tPzJjSyyWw&;Itr3R540gt6AX|b}oP5Xi3uLpG& z{!qd_SLWb%E8u?VxtR_-`;9nU9=v+VzW6pqJ0gr?vc$XU%-)AQH#Xl|t%_UhKBTBi zRQj|IzepYpyA-=Aw(At{_A&qIGw*%!bf2HAY=6(lj061)hDdi`QTgRBqi)5{*nic~ z=X?H`2RB_?8ec7x`zbtrYq6qvOtk2Fbw9&*dEKgZ1qYq2TJGP;f8^pTmrZPK_r+SB zX!DcT&GW9o$^l~oe$IE}-wodPm9b-`8GGP}oJxyymTUHa79~lO3&-1Zn=#rn;K+gz zp=D#Q_8K#DTC?^pL9Zt)jcTrRI@&5E(mLm-b2)|Qw=P=R=eS~4xPAD#r(K$Fad13& z{pU8LeZ@6P15RyPyJzaP#8Ek(3k%j7_P>6<`@`yq6Zcf`?vIJd;I}$c5tml8;*Hql zK-tn~k-~ckPg^a`+A{MuRnHd#m$>@Z{Cewvj>RS?2 z@oK~I>B|m;-0XdKzRS#0^WR;at5zyTC;le&TUi~||C>V*HV+?MIJIW|pMuf-W*-G! z%P;TUJxG+h&DZyIi+7>CAH3V%%gLQn`qN;3?vta}%f+!*_y78Mo?X!P^WQv^Pw@5f z4oO{B=UjE>al4xGj$0$thc<0$v8(mX3!%@xkX@5F-d_KcYU4%YJCpb2yIFqmX5Ut; zdHEi%cdZS(Ifyqk`moh=`#attZU%eAr+5W^9p>g0_V~4QZLC+PO@;gF^3N4^SUyI% zv&ZZunb)?2E^>Qma=oL8c!*!G)6+S#46>RzAFJ6`8y)RZQ|)Xq{p#y#Yq29eHwVe{KJblUO%@$T-Dr#qP|y zz_wv`V0(mtpEQ9K;~$*-{{8!n*_@W)>~MC^ZqJBaML0LyD%?snhUJqveDsLtiOFrH z&%)>StLWFOuTNF)vs~Z3#`YeA><(v_<$LFtg*|A#NLpoYF9g553Kkr?_xrs+3ohP^-+Hl&Xv*e%-<;`eQ#;9CXu+w9 zUTsdF@oytdX)QHsB~^ST{gKOx8X^4l(^!D~PqBdFe|aPP|6XW|TE780h(mP{rTQ0% zz;1d6e-1j>VyelKWwclLV{**>ke^O{et-gs{-&Dc5mSOaY@X`A$ z5rpSw4B?px!*iM@JRdJVzTixkW5HE>f)^DBUw#$bp(`tDi2m^97@n^Z+jvXQrDlxCW@Wq) z?zQvG%{yIt`Va4or%w#qIL=KF;~q7Y71Bv8@;{pDp3Za3U$Jn|f+Zsdox1SVDTnA& zmxrI)YQVa^(Yo@J22>Ldhb9iFrfU5vqQ@z%r}y}C*yHpSlLx0Hdt1NI?d(4~&GcL{ zZd&dcMQ)p*+$~RXle1X=c_4E6q+Q&^OHLD4Pw4~7hULTdLruKhT2E}Z^a2#yO*~6f z{LSrF`cQY>ek(yJrv^_`xu4kp>~)|vF}X1YjMg_m&qW`5L2lysX)5RM|D#(`G_O%wulK6an0d`kDU?ws=BDah^sb~gwy=r$71)d>H|AYw zgK!_!gr<6%)Qn$v)P!xQDN8#zYJwZ-Y}#oJzuB?qQ&vX=Ytt>m{pw6jmRvI)oMSxr zn#t7{toK8O5!CkU`RHRWEtDA2Z$Eus__vSB*1)}k0K9KtCvhCz+py++W9pFw45wMa zIp8#_>`w4{!w$un=(GmkJ~-VF9YQWb<3MkU!@}rA;;2Tf?{>dx4^)_5>ak7qbMV&m zs&Roj{1d;_H&sV}vOQK)8Sp71l`PCMiy~t(R+i+)~84S@Vc2j8Zz?P($a;H;il!=c*g9ouBtrd3`l}3p| z3Jo3~lC&?mc~lw)pUWw>(&L`0`6?YtNlcrBMQzLi^W~@bHe}l7Br( zbIHFkNps1+qnqZEf9FY>OaA|@IA;+GBBF9Vo5O;c?oH!9hEO2_W2hr37d93z{1l{NRT+@Ak! zX})Jk{_frRrRDjtLRLk&*bt37q>C~ND7qMb=<`o>Wrz)^x+p`3qKolMoPVIJvX|Hx z>6!|C$fX{eP;|lbhEYrhV-4#I;BM}{$F#Wp_(*qyvGR!g_A!y}8ROmijd$PX!ZP0F zM!u6E-%`drFPx|AN5)@y;QFKL@5jm@u?f|;AD}C}0bL`+rc_;&cP&A;m4fwEPuGr|K`EYdoc2%6w#>OOuvB1Im3;tq3-q^9w z-LrM1=TJf9(yk@>+$aaB$SZ7V>yrHa<@p=R^J9c8lWlHuKAO=DbY?}Q{$xX$*eF5T z;KUZ*fP?F_4rZD}|Cz3a>{_HhAU?pZhJy5>{($%-3sD{nQV#VOrxYJPWB!U6pH`TA zEP&hHQ>+cqHKqAG%k#s9tVuiVzoW+G2TU>zEVz|i{ee$e1Ucax-Q?NeBG^EzR3?4I zxNN|#3f8bU=Rxnk?C$m_MONTH%bN=XY#xUL{sd|roA^x@ z{G%t=Q1~w|d%-_iiu+vBN!h+rR#L|#-xNQ;QAs|kG==6bqXf#t#B>?>11Hlw0r*p< zk*cJEzUu<=3>8YL9Q+BDw4i6u&ne}tuI~0z=XupvVO%=+AA>R}_z#=Ou;L_c-?D%t zUf-R9BtEKQ)GT#g>-wwE{NYs`Xa`n}v`Z2%TvtnSdv*PmrKYP;14aR6->q%54S+Uy r*V_R2XJUHl?c@~oqchZbZR)K9{mGbKH7?X@l55mzpKSib0-yf_<>ELl literal 0 HcmV?d00001 diff --git a/exercises/information_retrieval/ej1/index/EmailIndex_WRITELOCK b/exercises/information_retrieval/ej1/index/EmailIndex_WRITELOCK new file mode 100755 index 0000000..e69de29 diff --git a/exercises/information_retrieval/ej1/index/_EmailIndex_1.toc b/exercises/information_retrieval/ej1/index/_EmailIndex_1.toc new file mode 100644 index 0000000000000000000000000000000000000000..8b124d6e5c4d2b55dd5698e0841347acc719dd2c GIT binary patch literal 2619 zcmb_eXM5B}5I%QX^qNXyx{SdT)0-jK5a+1`S}2CSk5)66<2Dmz9=WE~jQ*Y>2T@RvO8oduA1z=6yWWrwmEhOrN&I z*eWZnv`}+3yLyB9*fzIVSOP2PnsWuxDbJ~Rffcfowbc?zgt92c#Yz)_kjDvdNfDRI z3s6GuI&&?G5J&}9X5&M1a|xFb#_||f$cjWDY`o&LAh0|aR~kN(RD+^gHj{9bvY7lM zW1aFyR}cMlY{S+0_S?9ofNO=?vQ`Q3k?BZZ5WQsCArF1l8)Lqq1?;piNcbqle(YbD z4~AZ4Zsp^&@`)Iql!erQJOQpVR8@}%%5qgzD*4_G<_0X3@Tnp`Ei3hWPXcTg1I@+t zN&{I88g0mYA#N0=su$rVA%QR)+$@ZtZsHcP&MV=35V!)@7eVAv^4 zT7cU#SQg?ArH*21*d^e{tPpT#2I}Lk%m&QG-NL3`pW8dV9OrE~3%@i|#p#6RHmrRr2`A?DBY{6!}EiR$tyAthyiXJb4U>0-6v*^%R_ zVYR|=Afpjfm~C5b8P88E%jna&xk_1Gvc~a(;3cdoUR0LT5+|Yr67``HDZh#Z5zk~L zjXXFgw>0V2lh-qb>d!}nEZO{4eZP~TAhxjqRM z(tIh#7iDDyr|9`1hZZ^v&t!O6B+W$}5kzQTg~H-P*=~DoS;8wKoNk2ys`(R8qZ7Fn zDZ#7L%2G~-8J45jYQ@$J7NC~Ti9!#+DurqgvI^??)=ZRDc1e^m1q<8I0}Z6V_kr$V zk`@LFPPlK}B52p3zEPqT# z^52i~11|r=O#VkPemqb9Cqn+GF@9E)|M~wV|BC{Cxj;T}kzd95^?Z@v{D;U$l&MNG zYwwD0WBg8rrXqe1O&Sar@dp^zz$xO7;AjvP@h6Bhm@MMYFsZ>z5r2UhEfPtmnpJo1 zdGc$FzeRc%?I6FJauZGWPKiM-S4@-DwTBMxLq=CN=}wcj;qF0Jc9NNy^D;{lsn5|YyPq4trIv+heoYkVTG!vR8eb;> literal 0 HcmV?d00001 diff --git a/exercises/information_retrieval/ej1/main.py b/exercises/information_retrieval/ej1/main.py new file mode 100644 index 0000000..fcd8798 --- /dev/null +++ b/exercises/information_retrieval/ej1/main.py @@ -0,0 +1,193 @@ +import locale +import re +import urllib.request +from datetime import datetime +from pathlib import Path +import tkinter as tk +from tkinter import messagebox, ttk +from tkinter import Tk +from tkinter.scrolledtext import ScrolledText +import shutil, re, os + +from whoosh.index import create_in,open_dir +from whoosh.fields import Schema, TEXT, DATETIME, KEYWORD, ID, NUMERIC +from whoosh.qparser import QueryParser +from whoosh import index, qparser, query + +DATA_DIR = Path(__file__).parent / "data" +CONTACTS_DIR = DATA_DIR / "contacts" +EMAILS_DIR = DATA_DIR / "emails" +INDEX_DIR = Path(__file__).parent / "index" +CONTACTS = {} + +def create_index(): + if not os.path.exists(INDEX_DIR): + os.mkdir(INDEX_DIR) + + if not index.exists_in(INDEX_DIR, indexname="EmailIndex"): + schema = Schema(sender=TEXT(stored=True), + receiver=KEYWORD(stored=True), + date=DATETIME(stored=True), + subject=TEXT(stored=True), + body=TEXT(stored=True,phrase=False), + file_name=ID(stored=True)) + idx = create_in(INDEX_DIR, schema=schema, indexname="EmailIndex") + print(f"Created index: {idx.indexname}") + else: + print(f"An index already exists") + +def add_to_index(writer, path, file_name): + try: + f = open(path, "r") + sender = f.readline().strip() + receiver = f.readline().strip() + date_raw = f.readline().strip() + date = datetime.strptime(date_raw, '%Y%m%d') + subject = f.readline().strip() + body = f.read() + f.close() + + writer.add_document( + sender=sender, + receiver=receiver, + date=date, + subject=subject, + body=body, + file_name=file_name + ) + except: + messagebox.showerror(f"[ERR] adding {path}/{file_name}") + +def index_emails(delete = False): + if delete: + shutil.rmtree(INDEX_DIR) + os.mkdir(INDEX_DIR) + create_index() + + idx = index.open_dir(INDEX_DIR, "EmailIndex") + writer = idx.writer() + count = 0 + for f in os.listdir(EMAILS_DIR): + if not os.path.isdir(EMAILS_DIR / f): + add_to_index(writer, EMAILS_DIR / f, f) + count += 1 + + writer.commit() + return count + +def create_contacts(): + try: + f = open(CONTACTS_DIR / "agenda.txt", "r") + email = f.readline() + while email: + name = f.readline() + CONTACTS[email.strip()] = name.strip() + email = f.readline() + except: + messagebox.showerror(f"[ERR] creating contacts list") + +def load(delete = False): + create_contacts() + return index_emails(delete) + +class EmailsUI(): + def __init__(self, root, title = "AII"): + self.root = root + self.root.title(title) + self.root.geometry("900x600") + + # Menu Principal + self.menu = tk.Menu(self.root) + self.root.config(menu=self.menu) + + # Menu Datos + datos_menu = tk.Menu(self.menu, tearoff=0) + datos_menu.add_command(label="Cargar", command=lambda: self.callback("load")) + datos_menu.add_command(label="Listar", command=lambda: self.callback("list")) + datos_menu.add_separator() + datos_menu.add_command(label="Salir", command=self.root.quit) + self.menu.add_cascade(label="Datos", menu=datos_menu) + + # Menu Buscar + buscar_menu = tk.Menu(self.menu, tearoff=0) + buscar_menu.add_command(label="Cuerpo o Asunto", command=lambda: self.callback("search_body_or_subject")) + buscar_menu.add_command(label="Fecha", command=lambda: self.callback("search_date")) + buscar_menu.add_command(label="Spam", command=lambda: self.callback("search_spam")) + self.menu.add_cascade(label="Buscar", menu=buscar_menu) + + # Callback externo desde el punto de entrada + self.callback = None + + def show_list(self, items, fields, title="Listado"): + mw = tk.Toplevel(self.root) + mw.title(title) + listbox = tk.Listbox(mw, width=80, height=20) + listbox.pack(side="left", fill="both", expand=True) + scrollbar = tk.Scrollbar(mw) + scrollbar.pack(side="right", fill="y") + listbox.config(yscrollcommand=scrollbar.set) + scrollbar.config(command=listbox.yview) + + for item in items: + row = " | ".join(str(item.get(field, "Unknown")) for field in fields) + listbox.insert("end", row) + + def ask_text(self, label, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + entry = ttk.Entry(mw) + entry.pack(pady=5) + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(entry.get()), mw.destroy()]).pack(pady=10) + + def ask_spinbox(self, label, options, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + spinbox = ttk.Spinbox(mw, values=options, state="readonly", width=40) + spinbox.pack(pady=5) + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(spinbox.get()), mw.destroy()]).pack(pady=10) + + def ask_radiobutton(self, label, options, callback): + mw = tk.Toplevel(self.root) + mw.title(label) + tk.Label(mw, text=label).pack(pady=5) + sv = tk.StringVar(value=options[0]) + for option in options: + tk.Radiobutton(mw, text=option, variable=sv, value=option).pack(anchor="w") + ttk.Button(mw, text="Aceptar", command= + lambda: [callback(sv.get()), mw.destroy()]).pack(pady=10) + + def info(slef, message): + messagebox.showinfo("Información", message) + +def main(): + locale.setlocale(locale.LC_TIME, "es_ES.UTF-8") + + create_index() + root = Tk() + ui = EmailsUI(root) + + def handle_action(action): + match(action): + case "load": + resp = messagebox.askyesno(title="Cargar", message="Quieres cargar todos los datos de nuevo?") + if resp: + recipes_count = load(True) + ui.info(f"Se han indexado {recipes_count} emails") + case "list": + ix = open_dir(INDEX_DIR, "EmailIndex") + with ix.searcher() as searcher: + emails = searcher.search(query.Every(), limit=None) + print(emails) + ui.show_list(emails, ["sender", "receiver", "name", "subject", "body"]) + # buscar con queries y tal... + + ui.callback = handle_action + root.mainloop() + +if __name__ == "__main__": + main() + \ No newline at end of file diff --git a/exercises/information_retrieval/ej4/main.py b/exercises/information_retrieval/ej4/main.py index 59e73a9..7078603 100644 --- a/exercises/information_retrieval/ej4/main.py +++ b/exercises/information_retrieval/ej4/main.py @@ -17,7 +17,7 @@ from whoosh import index, qparser, query BASE_URL = "https://recetas.elperiodico.com" RECIPES_URL = BASE_URL + "/Recetas-de-Aperitivos-tapas-listado_receta-1_1.html" -DATA_DIR = Path(__file__).parent.parent / "index" +DATA_DIR = Path(__file__).parent / "index" def init_ssl(): import os, ssl