From fc4dc3d3a05d6e3c892f80ba7edb83bea626e4b9 Mon Sep 17 00:00:00 2001 From: Jeena Date: Tue, 15 Oct 2013 18:50:49 +0200 Subject: [PATCH] added report --- find_lines.py | 28 ++++--- report.html | Bin 0 -> 47986 bytes report.md | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 15 deletions(-) create mode 100644 report.html create mode 100644 report.md diff --git a/find_lines.py b/find_lines.py index 13c3a43..3168d70 100755 --- a/find_lines.py +++ b/find_lines.py @@ -13,11 +13,8 @@ def extractFeatures(label): img = cv2.imread(directory + fn, 0) - #temp = cv.CreateImage((100,100), cv.CV_8U, 1) - #cv.Smooth(img, temp) - + # find edges canny = cv2.Canny(img, 50, 100) - color_dst = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # find colored black_pixels = numpy.count_nonzero(img) @@ -30,8 +27,11 @@ def extractFeatures(label): try: for line in lines[0]: x1, y1, x2, y2 = line - #cv2.line(color_dst, (x1, y1), (x2, y2), cv.RGB(255,0,0), 1, 8) - length = int(math.sqrt(math.pow((x1-x2), 2) + math.pow((y1-y2), 2))) + + # Pythagoras + a2 = math.pow((x1-x2), 2) + b2 = math.pow((y1-y2), 2) + length = int(math.sqrt(a2 + b2)) lengths.append(length) angle = int(math.degrees(math.atan((y1-y2) / (x1-x2)))) @@ -44,13 +44,10 @@ def extractFeatures(label): mid_length = sum(lengths) / lines_count mid_angle = sum(angles) / lines_count - features.append([[lines_count, mid_length, mid_angle, black_pixels], label]) - - #cv2.namedWindow("Original") - #cv2.imshow("Original", img) - - #cv2.namedWindow('Lines image ' + fn) - #cv2.imshow('Lines image ' + fn, color_dst) + features.append([ + [lines_count, mid_length, mid_angle, black_pixels], + label + ]) return features @@ -72,7 +69,8 @@ if __name__ == "__main__": tree = KDTree(features) for t in xrange(0, test_count * 2): - d, i = tree.query(test_features[t], k=2) + d, i = tree.query(test_features[t], k=3) + print "-" for j in xrange(0, len(i)): - print test_labels[t] + " is predicted to be a " + labels[i[j]] + " j: " + str(i[j]) + " d: " + str(d[j]) + print test_labels[t] + " is predicted to be a " + labels[i[j]] diff --git a/report.html b/report.html new file mode 100644 index 0000000000000000000000000000000000000000..c44e3db6ac249905dd1516e2e246cf3c53275ec3 GIT binary patch literal 47986 zcmeI5`BNRqm8kE}{a4`j^F;S(B=${idqyk*1OfyIkUWkaHUVOl*n}MKUw`j?Umj9f zRrj*A)NVgVIBwjfsxnWWJbRuzRrmk?pPyR4YfZEkTH~#i)|=L1YqHhV+H0M+j#`JU zKexVX{nU1})7ofl-&{Ly?bG(KwZrj8K2KXGe4o>+o6mAhIA@%>zG?d%*Z-Z7zH7|` z?TDUFIhx{or*!}p8$dbWjNii9O8vSjFuGdr(pX)smwaEOwx^8$UF#V~=U^=+Lfvb9 zruS#Qf2jO^!W?#5m#zP5?K1ubV;r`gFwO}_KLX`-YpFGN)BhZJr;Pm+th;INy!IGn z^GlEJHr%_R>Iv|=X?4i2O@29!RM-KtPCc6Qf81K8bN>Y;u>YRE(()tM&v?J1O&46>U^Y8k`M&iZyr=nn2xWG-YU>T! z%yV?XJ6g=~`xDo?z;A<*&Th_6(BeI3R=Hlkwvxt>wi`EN?1RNdqUAA|l-5eYSc12V zM2d^l-f#VpHf3ICocYYPr>)-B0KEB>t{-ytn6r;>=CjN9d1~{dHI!P&U%8=V{GM~= zCHzw`+U6ycU7(!P@-+4D`7%bcjdWK&oSWE`mYYD^LW*}2y)S^d4a_PASK#ODb`oxN z$WL4S6WJBdp31%RG^9|0a@k;H- z1Yev!LJf}}C+(kz-2pyJr-z zp3rL(SUZXC?Y*{u(sT2Y4yD&ISHh;*OBp?bi}FX!4jZGi*{d!|k7>1&biFQ9br~sb z9@ivq3td*mLT8j4wIgg;jHN9J9Sz>Z4AnF3aPTN=QcuFKj1|XC`N=asWiBBtRa~D! zpRQKl7mbugd+<)a>~TK6`)|GnZoUU^zK1&Ss-GF>mt)sQ>tno-7sst?c%9DTW$$98 zu9*EkS{PQyS*oA5dED~aCj63eVcWxEpE3FgFFj1yAtlXg`+RHR)JWw}ICbu1t?`PS zs`Ewvth@+GnYC4>T~&LexpG=0E>rd)t@>-4cpI#N2ele}P^+PPTK$cAcCy7!lVl$f z6>KM}sMFd;z0wXo^%M&1LM8i3@gY3Tf6!tT|Fl4Ka0S0-@m9M~`q|BE9*_7M&Q0>U zdvk8=%hhMCSHxM*@VL4CyVeLJjnX5ZnQE;A`*~{(-?xMZ+(e?^GSW8R&(Za_z!>B8 zmXSxn<}D+RwO-I}jqAr8uX1$A7;E$&L_&<64vDGq2->()T0Ljf9$HNz(=VCP5nlch zILq|ftHB>+=9}P^A;k9t6zGO7(~Q5zczsBg@;%P6c&+jKlz#6SWxqAd>w@+({2t@{ zTjC?nuj0p78Tn%p1`IL zFpp#UozriMd5O;ykmabyXW(k~y~1N4hWZUZl+nAa5smyn?>samjIide1@6EqF0U+tDPS*IfTZyA?(m3jWV{C%)ChFhbRbY2IG^BelV2mem^PJ;J(nydI7-tfU_brZbC()d%r8sfdi zIO4gEOvxvC*gnsYi>q7b8S-+G;4J|!WHeo((Lg@ilaXCy~?9;cr=Je~NVJsM5p z$fuzH3uuz<(p%=NEUQnUrxVEG1mmjbd*Gv8yXLpw@-_7I6?hl=@1jdnv=+YYK0`;p zuYfz5sH=WFgQ72?^UE7NmBY}x_uz5EsMTTFZ%xv}S0VEr0whM30`_+2ypIyfHk)>d5K$nEY8eY5-z zB|OzL`STq5tIv8)>7rg{yP)13LU;Yu2^@GuAKOa*d1POC+GSq)uQ$lWOU}Pe@}a$# zAM(-bJ;0viTYoV{-xa8@B^ZZq?f8X%e-D0ZaPN?L9MiuueoJ@aC;z_{zf0_+`ZR(b z=+CnMxXT`A`>lO2ZV9{mn!fU?3a{!{i{D)VYyH&({RiNmd@{QrpL(E~{JuzhkzZT9 zXNWHr@$s+Fqjfl#rdpux{$mR)+mwRGy7-OD6fND zgT!jqm9}e&yb5`Y>^@}e?%K^>=Qk@fk6cUJ+t|IVr%Kkxm618CkIbj!FI@dqA$jz* zkos$oJo;Kl12sq|GyR&2XwK z^JZ2@?-+SN=TfqKv9d3#-M%|89{K{~K8&-p--dN{_EoH(GMkc5C|LF1fmO+H#(I4q zwXM%GGMU0Fm=}GiW=YHS&b;`wXEyi?X4WY??#WlX@U%85ZT&`jq z-yFG$H^edKSd4KaSMizH)pc7x-`8BN<|qaKk97R6P zvE#pT74@7|PGv)vP%8=&21?6JSiIk<*(?b{i@uge*<<2Umw z;Fs5$c}@S@<_uiTnb&n~CcXO;J>Oti=UHo+1CE@5IT+Xb2j5(m z55S=TR;6-pmh0x^K5>4Mx9fXX;?qeEo-xuMJY9s7?$qRX%C?8-pPBPzBEDCw+Q)gh z2CuHZ9|PB@Ge@4)z3$i@t?rN4*9q+=_?yspt zaP0aR)j9>Qb{ebq|BJD@*l&ux=D+b>T@^Iv@XKfD{%PmB4p$~k+PjbX@Ye39KD@R2 zs-0VZ`>fq-?F?JG#$Nv2ZDODQ?lw^aaCe)i54gKc)DGO;CRVQBCJT`Xi?wUyvhr{9 zPx-eQr2N|)Q2uQe=QORg@4`Z@a-D12ClwyZ(`HqP>)DduD68qLonEtF$tZNrI20Il zYvT$!uZ-(BE9a$Nr(|~Ot?E@J&aAq!7qh1RmCBQ5b(KB4Zk0-P&uqc%EgbDW?94N? zW0mVh#NQV3;ru~#Ep+dk$@wF&N*n9AqL$D)2Xh?qWh><+>U7E&Rp(=ETks(~PvJ=& z@7whHq4N%}au<{;R!w_b$0fu1x+*rQSB2MJSr@)Gs)7nV+G%OFyj-ut%s6(ohEnaJ z^RW)-aG7~g$0 zvuT?^G+wlNU;&KXUvwX54NPYEwwlyxn>{|CfyFDXTleGM+-dqX2HYj=_9Sgqi4$jl zyb2ump0)1_P|98EIj$em{|%9^bkg3gg24nBUN9^7?94o_Lx~}9egyXCe2#+sF!Wv^ zUfx2g-9L-pBg{waCc(?B-87PGRjhnl0QM|+x|?V1&NPwoC`ZfSzd~#4D%_`f4)*U8 z6gm6~_-9-jXLWi7CeJzd3jsd$5TG~oXuE5>u2v=EWwB4hX zJF-KJB1|hNjIWL97kKZ}&Yk!VO7;w`<*|FMCt!HZIODwM7}G4R z6=L$*eaCU$?gv`$Fq_sKjy(VlR~);exJ&yZaCIeVrOGaFdJ-PfNY(XaGv^X1!X)y<;?&n$))(_N6+PPmXEu7mCGyeo$M?hL=;2 z-3>HrL!^-Vr>@h7q1!M$Pq{t_ymcTbpA+5N;RG6vgJTbn)MhP~^?VDQUu5JF zW;Vdcapyumxb}U4*Nsk}qYvtgd)Dp}s~I1Yl&Fc_VE+lYR%!Kd{tzB4@OuP&C&A=> z;_@itZ}GdEvpbxTQr1Y!CLH(S$sA|ZtT7DiP5 z$G39BT*nR&tV$l?a~CYcXNzB2fj-`QP~n8*CEA}r*Ed`_L-wqR9E5HQKpBUtdyJs9 zkP@@Nu>QjuL@Q^O!ARPLC42_9+Mx|7{2WRy^Lvr2a|y%OKr~Oa1&l>nT01%d9CsqD zSGR8SBQu`nIO=52fHuk4z0C3jpW0JB)O4bYw(|oxYN@Rn)WY=w^BKK9(EcN5miVQ1 zzE9`RIhsX6dcgb=I8kkAwT7A~b_2jzVtj4R5xuPKvO2|DNGp8T5=FG-`pQqdtjrut z7NVEW6K2s1oKNY~SmF(5r#Pz(e8YPLSXys=x^iFzjJ9`(nQEw@NhZjI6jBfQ|)>N_KwbB;EktoeNT-bXttbYB6% z8WufldIC&p0=oONHy zc+2rVaQ!u-=!JX1ZUqaVR?kAiceFg<*ITa5g3S_=6L+&IU;Fgk2kr;jtwr_EwogBN3ey)h8s{_|7b*I6JbN&PHjF-pl6!xO!c` zJ5SGHMtu#AT9Nm(m;jz5KH}&SOsvCqC2`8Jb=%hVJI4*qnyuaEyqdQFwLfvjI?Wm8 zWX%7Pqjk=zS6W(Y?#%!XG2>6*WAxGsgf+03V+O+VkJ_7zq zqNVrxec-r{>vR0}z7p>Z7)GWp8EKf&SJ9SZ=rInqb4liu;Un4&fqOq!^yk*r8DDP# z&-l;y!aDCq$f=G1Kkr7A7jb`_cJv*`n|#WpQRe;*NXA#2V50pUVJs!s zsyuhgjMc3YcE{umTy%fRO1MQv+<*#uw7H<=H2CYGrZ_*zxN4)aG0o8iv|9wW_cQAA zFPNp0V;1fmc+A7yUEYh#*R}N~wDb^qFE-J3};@(mEh<_rwp^S#P@iay%?O=$8KidbVk!c`-* z!z3w2)yvGywTZsk3VSQ4-tyah`ZZqe_qmq+2oLr+UZn3^a9RbLdg+?)6W6Rf{Y#cnIi|v^`Dz)R+~nJcnmig;}wA4D{DvHk@eg z?#l?jwb|YSbpd4SDOYIM$L#c|TD2qK8moWe`z5?{mA?%neV99Aj-(fu2D_Jh>r=15 zZ2^cz(pG0`leeIeYyU?;wPM)24c#?ar{y-7dwGFTsfuZdcs>vtc~BM zg)yR3GG3Lt!yJ8NgmbPgf}NGa7o3fIB+i+^8a-a~Hr5*j`a0)6CJpJKrC95q*17K5 zz`D|D-mbB?p~L_q8r2K{S3IQmJ7ji*;|Y%SFJ4&(BXKeU?%{id?*(A$O@_hYB{c4b zg2qi+vH^Pb^WNc%k?10HRR$ixlVSR}bES81r9aDv-tqkiOfI3r2ES&Q@d)^O$K*W6 zz2Gtgb>Gq6d!3HCA|&Ct+H%FB&&OQf2y@e|hV_fema2=(!RClMq;TcC~ z%v7DWrdxmfnj?LId+1}dKcnRw*X6N!1$W?#f%M|j%+hSnCOAD~Y$MuBDCS*7i_pi) zacjx7H?BI3T6>aa*lq|&#_X;@Txn?az2`*CjHX=uPXkR~qG$1bBfVrFaJ;X@2)U2r z88|%#6;A2-7D-zN({;vq&4_C7b<(C`KDDRb4W?W_r-j*>&y4#KTIn6M+&hV$m%!3b zzM;h+SG`9?i|7idYHn8g8BiEH_qjTr0Rvgn(TnFLFB6uCZ zp*1iagDVT*;r%L;K-rc1X)ty!DjhNjB%`u}Ew0^OaUt zoIL=dBO2%W{DBodN4VP0{9P9f0@(_9xw}Ricap6;&^A72wik@&KDGAWjDbJkt_OFoHGaU8F?_T=m_RZTp50( zm2rSMg!jPqUPJAud*xa^^MYCk?*=+$HuF$M>F%Soxh`|6qhMlO{)TU@wVLU=d@J#J z5Nvm8Gs`t)?ImOG^4mzsOnAhu`g`YaiG*zMOWS9J>U~KEywx7pn#P@*ypMrx1fx%U zN55(2cb24R7|hLmZ1Ao8YNc1`*_~P%tq%fWn?4(fXCL^yrkx&QnR)1k%u%>%8igKG z*Olc5j*METf#CfOo6y=^k=f@FFg4?8{g)c8XECd#zdB7l-yqTRoP9|<<3Qs+*NrQ* zIA^x{em#uwf|&`Q$Y6YBMroTF_R`Nxv6(6@hLN9EMJ?Lks!y!&*$Dl#_zYg&S+xy) z%!%yKdXQh*^f@@7UbtG9+D6#Qkg?=4Z?jd~jQk0_)hw-BH@%Pfr3M=_7z>Ti!>38~ zb50L-1&)32&O39XLe>f+qyKBZFX^o%8>g2!Uo~BSXP!YXKM0>MkbPxI>u2m_L~RV? ziou-tFuh;V$1}>Hx~~*pf`#(8kAzwwqJOvozkaS7^_v+VrKi?ynfBTOSFO&=J&`L$ zIRSTTxfVG#KPR@1cby^62Ep}&c4pZgF>|HLoUK{Y zX+{`@rmpUdBaeYw9Ag;zT67^{M6sMqc{a+jPP9*8+ESHUwt+!ILGK(EVCo( zkuiasGyAXq*7kgcNAlzX80K6eN;5i}qRl$cycf6^DR(Vq9#czfj>i2C?Zh+snT?kI zJ_+j`SPVm%p@h#SV=JewK|H6g+kq?IhwhG{@z*10s_%C9y&GKI*Klvbr%=d8;cWr` zjBB5v)qKF7g) zwyW$bpjs;!eeZq#jKkDwNW4oR<8Zy$88t&aM z0>KD7?5Otj_Pa;FzxDpnGw|~{WacNjc*hfptdwy*WPNMrJE5#nsaLcbUFMfp>-91j zviIGYd5Sv(mGWcmX|30zRcpeURXl2N;5C3W#ILyT%X*$pH8Z&y?{;i&e;QYP4)tod zJ|rl{{~fcq{aIe#0qGN=N)=D85BmCAqt5gEHL8xZT|$pL?V8jt&xtC}^a{GV!qXS~ zEV$;GT+viuhg3y1(+TpIpAVKR3A60^yeq5Y;<;d2nfwxFr%I^DH=_&nDSGBUsrMT; zCDNKP&vl-mved-Zr=6Vx&k=-b2KEP}`54`bnY7rTLJUZ17-i<}49_={PZCSxYOSXF>&-Y7~yJ`8|Np5`Z znCnoVESaBFV7q38bFpU`$|v;9U2h}uJ!ot1T%+97*|5X*RG+k}){2?^=t?uGQr{<$ zmf5;~Fo$IYw%H8hzQ5gzUw*#fJ9NhTCw(59m}pFcu)^$2%y{_Kj(xiO&VsWbSD)Zd)awN~UN@brR8?^6&c3I}WZe|Y|BabG{ z<0-c8Py58ilO$o{T8|K1S?8Bm@5Zf3nH(FGyMyXj7R#%Th`Y2BVYcAYIllXJK8t3YZWvDn7)G}ilhRUO;s?_@6PmHNNLKKJWo8NF_q>k?gB z6&wpq#I}l0!J%rWZ--d0|E2TJ@nIQJ`u?(6m(lI}5G;Q?NzRtwZfn#&1HFcxKccd@ zW2nv{b)-Byr|BuYS|XUTVtIVyA+>+;q-AsEuCk=vL*fRX5tZN3=a;Wgs^cj8?(gms zUHKIDBKvYOp3hQ$f>DzWV$nW-&)zogfZ9G|Yee#sr#r0!Z+n7^ki_;$irKW*t}nxC zi#>zO=i7gzID-o1A1FLy{!fa=KV9VU^8a4= zBs$kuf8zfL#9P{kkvVQ_V6+6^)3+C`?jnENxkZfAo5qO$)9|@;8LoBD#^f1Jz_QNC zv(dNjOW#v|neTOlnR~_>$kl3OaI6>!&YzHD_AV`-ALZQ*o{u%LPwj9YL0X07G>_q?+%9%#gzdUL_w_yi&snLfC-Hnpf&N^K%#T%ZcP*m9d!9e!?i=#H?dB@r1%AHcF*W(6@@!(fyQa(Yij{CyDY>`@1AN zaW{76nbLg%z5G9n;C^3{04saqd?&tH7aIT8psZ9m;>_K}&y+4$MY|I?W7bce2kl({ zyW^g@A})?BZ8MMajO1_3F>_#>wsJJ{CioQeG7}+g;$*%ta5cA+Wkb&q)w+(Z%#^tZ zQ5gs+Q%3@xbaD6E%Ep+b8WWPN=M!e+O`Nejakc?PEX3To2gm#_Ja8WBCv;aFxC z^=g@-Q8D7YZB?a~bI+8D)}D!xZ-eGNY0f<)so7h*Tcuw}ruU9o6)DBs;TD3n$2;cL zCFwEGFSXMDyivpCrvLF`%Q%0XuH;$S!pwK9+^vTAH~z)K-6D4@{zzMS-=tp1f)%!2 z6I*HSU7y-Vac`G5YuWT6q4m;5URv*Q2Ifk8RadHZGdz&7fIO&L&Cr>}#Ixr0yKQEj z;vv;$GP4F5tE%a8Q4MvZu&WaZi(=EVMpS!;1uGDmx?Iikg{O!-k}~NXQ+f+`*W8QD z{~4qm)))HE3Px4uP|RNL!b(f;mUEBMYM0Q~(9!OOYyj$@$w?U9PoqLumkrU0fR7p|di&)ql>&*57}0Xs1cGshzB3mX{&8b$ic@U$L*n+Ua$i zy|Q_N#A=Pgp9PiF_VBb}1GHDlt4FTDwHtcC_!lZU9rj&2E03DmSRw|gP%O2c(kg|p zszQ9nOBl*&cx>%VSK2>xoa}|e7PvDM(Oux#X&&`?HfOQVPO@66K6E8cNVlkga3&>U z$Rk8sDRehv@1?d1N$kDjEB;YO9?Yb3!53|va3V75WIOYYWcCMINO8^hv~%t@ymF8F zoTa35&Tu1=$&s4&A^ych9}#<_851=wqA6{*)GBMqymFIH!NC~)sH1FHBjmqziHG!Z zhR!h}yy|SLTBO&=oGX0oJj-I|3Zld-YE(V0iYMSiOd9JZZJBE)Giu74 z7PcDShTVt=u;^K*2u!Z60U;tPbzeKGf5|%RxmXcs8ktv=h$E`Y)}h^pEeb<#m3EpZ_!% z+U$r`?Jlu<)_vD}u1@N9BU`(T6bXk9cg$i@w2+ZID-?++z_GjVHr@+Shns1XXCVIq z4RFSC>u8CX~SzV@GRz|9^V0Cp9v2;KR2<=)~u(n;uMyw&lL;oEA zP^#4kJdDu7dWfIcS*@a^MNL7GPO&Y1JM`amfU#xdaI=0Ydx2ryGP;^pSB($UnQD$s ziOBLDw5n=Dr`)v~ZQYY|(zdPeOH1UZGcR^7B9^K>`-!VY4btmfT6=is-4$?(9>;pe zX!n5gj^cX7)j(bey#$Jptu3_j7yOpjO2Y;%!}4oQw6KmZ50*8gG!_B~g;U)%ra^InWD&PL1cIO{#-W^t-#t^9B<>R;Bu z|4A|^4A;WK{U6{;$&%d+Yn<2n(W;xaLiz7XyPOSu%#5uT4UcU<|7uj>(+={uMPjpk2<@$U0CglsA?CDP1<6bKUucxIuxLk#LG)&>VfBaE z>o?a8d9GPvixml<9(E#PAJ16%Y<9)9oiMM_mY8pzYdk43=ObOWshbhCMU9;tsb{If z&OWcTHCD1BuiiRrZ^>AfHKhLq6FsDllvXP8yu<3%A!eIt99G3r3byZpNJubGjfYP&yz3btSHmMhw=G9${ zzQZik3$;6Ak^2<JTvYAKoJTKx?f$kLT*AU*TBGUw2Zi^N-@C@*GjnzOCW zqA9UO6WS@*U!kDbd%s^q{bqBl2~acDetA}930$>q%?MA5fl zM8uzdI&#FN5vK;VBa*1+T#Dvbaf&siaZa?((T1Z%RhGQY5r{srX|biFw$qV2@6tJP zWDK=c5pCqTbh4jns`NaBI_6^4)uJU4jTTFtA+@j3@;=p5WW&8Ct*urs=d8Af>OFF$ zmalg+Z=(*+$|G0Xv76;x=wY$*ad>TUi+!!|R6jFP-ACPh)tV`vDHK%_3rS~=@nfrL z=1t0JQDXN>iFei5mX4p_VUaCnF;zl|p9CIz&GECvddV%0m7g2g|NY$i)n zS_EBlgjVevHmTY&;>4^YR-%Nh&)zLx$xs_Z`|PuFV;vv`!}CV$C%3oLY$A@4df6Z3 zYOe4Qw?`p=AHUWfX)Du_wU0ZNa-86KGw6|1k$TO@!AP$2da&$0<(N{7CIxuvpM^cL5cnCIk1k*lW|==r>P`$+Uc(6 zMAejRA3{r-aD=QU)gv@5vRdLe%3@Tf%AZ*MI)^_a#qup@Ekhp5?CcxuTrcMyu@4dT zXhv_^U!T2e-s)>0=rw!_n;LL;|L!DTc`o%@gOXEgQYc5}k=0(fp_02W&6cQ1wfj!$=W71WGCtLw>#WI}theJviQX`7p~s9X8SOLnSQigk0uGg)pr z&8|p8)56tv77o~BQr5$-XjeQ6t&P3L_SJx#8A$&AC)yrybdcmVemCL8jzSfpG;d}M zgNOAU3GJ%&fO$nxr*NKqXB_5hbBfB_Dh~3p9g8{_jO!xbVGLExe^fJIk>ASLG^yt} z*^spV=LBQ^;u-Nsl?qMijZ8~5 z`f`0}Udjxmkc_sgS;LZ(%BvCFb>Wz;@*QMovRWl4cx^xcF+Gr=eelc zv3|0hU*?w7O|N7fjdf5+M`xML(U~!swVQ`fSe&bK$e4EGZk1lfe>rydjvZHYxzf|; zx;mCmV*L^5!H>H9nrV|(jve`-;s@lpRL)jV%NjCKU<6FDjd9!UgEY)>ZgY&t*w4UW z^{P6r;z?r1x1Np3dU6~8hErr4_WCaVN!_TE@65p}Mx8uMXFX8zqw+b=TiUorE?!ou z1nq)jp_BT zqswAxvxU`nTRRc5Q{9oy9#oI(RijXAevH<%!QmayP7$5+6WcljWgzFa>U*WqIA|7CV|p6rWn@^5bLul~dLdhvSS~z)T=tyU$j`WI zG;==Jw^sR{?|01EvR~3WA}FO;s+4tIeJ!MZM>Mm|yj>j{$vtL=M*bKx$eqxLO!cUq zsP=UK|L(rQnR-2iG%A$wnMfh8aaZDf*hE_;yHrt2yBrY;U73&kQ8k~Xo=efXAE{&G zGmZZMgzQ0bq$FL8R!TKiP)OS<9mOZ>r7?}Z)e5kP5!)jzamd__epPR-%$mEie}*BQ z#4Ou@Jl|~h0!n+{+Q{$;{hX(8ji*BM!usS$SPBLXVG+eJ*Qp%xJ1l{;wYT%u6Wgk8 z9U~GgGzmQOER_ye$XZ)M}J4xtTvK>;RE*%+0 zT{@Z&>zs8iYQFkkR=&+hG+S@SbL|tWD`lYEcbcCbCVZbDXdi!Fx8;3vtx?>=81}~~ zR?3(yj(9IPSB;^SA-OAdvFi~tR;oH{Pp-iz(RVe27@i}vF?^y>93{&`RA8$mUndb# zH!1-9ZgRr0vV8O>Ex1cQLO%*LH9K@gN{96pVke1NNfKkLEctRWV%*S((Az4$#bVUQ z3HxMpQjH2?jo2RD$W8aTf?rC!zl@5S6m>ON&Fs~sT+HO65l^-Z&RI@n=*1@}gC)Bi zw#RcqsOJiA&u?|w1B~69A|J}5|5c`K60ew(9_qKBNnS%&ZFk&3q8>)&TRqnqx)lB( z)~0p+mc!C3b~&=wDLUbGVdQ*B)Mm?_k`0WSs=|S~Wr%nuYCv31#$Dzii$3f955FNr z>sevFUp=Rr=TPWu{N>r8W43Dg^zcls>FNG?jPFhZbZLwI%iSzjpbhVn4x~f=VOFTQO6@H$r@Xv+y4s)oi)2|EVs;= z9D&Bl=LlV|*2ZNjI&#Qd-MZumqPicL<*axv=N{QD>7*UVacWp@$FxmEwXvtFZCBo! zIf94sU2N5)SOv?wA_-xKip4DQ-?Vb|^@FwVxt3k{=CZDnPZ&!6z<-m;*CRTQ7GdM- zIpUQhcY4B{X%Y*w?e70&d6Obm!3Q+st+2_il$0v@l6hctTJX#GQEzYUZHy4oaX;0o zxhURAa*&YinJ~SkfZaA;sI|vF?|K_SJ>58T3)Q+r3oGTG`Q>IX5xLQ9#(E zSQFIod}u7Kq(rUm7*w==Iqc$M8chi{1EGE&C#_X0x7s|Uvv@{};{U#VHqt!oLU=N9 zP^V%qENp>%k_Pf3VoNDx?3H=p6{REFC2MK#mV7;nI&SjoEMf30Dbqf7s( zC)-5n_=H(RwkUA_0p2&~(41wSt<==kJ6`ZE?z0OS^!ecJuwrInAqRV)V0A7T^6$B( zHx9Wqeltost57<(G|q{7&N?3BU(Wweo}Fh=jc@BRnlaAb@-yPQyW44%uao}ep3Uzp z^O+V|?(^{(TGE4_sKd>^&Eu%#GJ9-Amv<-}xcvAatg9nv$I5z7%TvADSt6S}DROrkzWRr}6R964GjT@tmj_jP zc`t?gmsP$vyN7V4;L|?4a;+02{a21$?vibNquJX|68n4bHX{>U)8|IzSIM15e4?(l zOIX&0Gwx?Db}#FR(LkL;XB{cxKJPXVs=N#sQ9IBMBk0|E^mZ&W{6ft-^m1PO?uOkC zz3G>m68e`8bjvdNm*kaQ5!5LdR=J%j)$;{w&)(*r{)#TQ6g~9qqm-HCu>)$MLK(l% z>M1aTT7{~&wJRfhJ%uuERl0l|HL4H`hQ>s(YoNqb*Ec`3X{NZ8o;g-B|Df&};TV%> z-*=FKi0(SksH^qwwAO1^^)pLz?56G0LX0wgPttnG==U1f1!|Kff8kni|A7Ae9lb== zOI5Y+am{v=nP$xLjNgOT?K2MFV}!2P?li*;uZl;RQB`JQHtlOW$5-dnuIdb;WgWsk ldiNfhxVm#au1p)3xC(dIR{lqZ+ts(%uD=}T``7;5`hU&#!/bin/sh + +NEW="new_$1" +rm -rf $NEW +mkdir $NEW + +for i in `ls -1 $1` +do + convert $1/$i \ + -adaptive-resize 200x200\> \ + -size 200x200 xc:white +swap \ + -gravity center \ + -composite \ + $NEW/$i +done + +After that all the images had uniform size and colors so now I was able to compare them in a meaningfull way. + +# 3. Feature extraction + +The next step was to extract the features from the images which means to find something in the pictures which I was able to count and it would be unique enough to find a difference between cats and dogs but broad enough so all dogs would fall into one category and all cats to another. + +## 3.1. Straight lines + +The first thing which came to mind was counting and doing other stuff with straight lines in the image. + +### 3.1.1 Canny edge detector + +I used a the edge detector algorithm called Canny to preprocess the images which like its name says finds edges in images. Because of my preparation with Photoshop it was quite easy for it to find them. It is not easy to see that step with my drawings, here a picture how it looks like if you do that with a photo instead: + +![Canny on a photo from Wikipedia](https://jeena.net/images/2013/catdog/canny.jpg) + +What it does is basically it does noise reduction with a gausian filter and then finds the intentisty gradians of the image with help of some trigonometry. + +I didn't implement the algorithm myself, instead I used the [OpenCV implementation](http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.html). + +### 3.1.2 Hough transform + +To find the lines I used the [Hough transform](https://en.wikipedia.org/wiki/Hough_transform) algorithm. The red lines are those which the Hough transform algorithm found in the example picture: + +![Hough lines](https://jeena.net/images/2013/catdog/hough.png) + +What it basically does is grouping edges, which can be imperfect, to object candidates by performing an explicit voting procedure. Detecting straight lines can be done by describing them as y = mx + b where m is the slope of the line and b is the intercept. The line is not represented by descrete points (x1,y1)(x2,y2) but instead as a point(x,y) in the parameter space, which makes detection of lines which are a bit off possible. In practice it is still more complicated, please read the [Wikipedia article](https://en.wikipedia.org/wiki/Hough_transform) about it. + +Because of lack of time I didn't implement it myself but used the probabilistic [OpenCV implementation](http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp). + +## 3.2. Lines features + +I extracted these features from the lines: + +- amount of lines +- average length of lines +- average angle of lines + +## 3.3. Other features + +I also extracted the amount of black pixels in the image to use it as a possible feature which wasn't using the extracted lines. + +# 4. _k_-nearest neighbor algorithm + +I chose to use the _k_-Nearest Neighbors algorithm which only locally looks at the neighbors of the document in a radius predefined by the user. It assumes that the document is of the same category as the highest number of neighbors within this radius. + In the following figure you can see that depending if the user choses k = 3, as showed by the solid line, the algorithm will conclude that the document in the center (green smiley) is of the type triangle because most of this three neighbors are triangles. If on the other hand the user choses k = 7, as showed by the dotted line, then the amount of neighbors which are rectangles is greater as the amount of neighbors which are triangles, so it concludes that the smiley is of type rectangle. + +![k-Nearest Neighbours as a graphic](https://jeena.net/images/2013/catdog/k-nearest-neighbours.png) + +In the picture above you see how it would look with two dimensions. I have been using four features so the algorithm had to check the distance to the neighbours in four dimensions. This isn't really more difficult, it is just more to calculate. + +# 5. Results + +The results were quite encouraging, I assume it is because I only used one style to draw the dogs and one style to draw the cats. + +## 5.1. k-fold Cross-validation + +I used 10 fold cross-validation for every test I did, which means that I used 90% of the available data for the learning algorithms and then the remaining 10% to test how they performed. I repeated this ten times until all data has been used for testing once. + +## 5.2. Results with all features + +When I used all of the features and three nearest neighbours I got amazing 100% accuracy, which was kind of suspect because that normally means that you most probably did something wrong. + +## 5.3. Results with a reduced feature set + +Therefor I tried to reduce the features to check if it would perform worse. + +1. When I removed the information about the amount of black pixels basically nothing happened. +2. When I removed the information about the amount of lines and average length at least I got a couple of wrong categorized images, the accuracy went down to 95%. +3. When I removed the information about the average angle of the lines, that was when I got significant errors. The accuracy dropped down to about 60%, which is still better then pure chanse. + +So it seems like the best feature to detect cat and dog face drawings done by me was the average angle of the straight lines in the image. + +# 6. Future study + +The most important next step would be to gather many more drawings done by other people who use other styles to draw cat and dog faces. + +Then it would be interesting to use other learning algorithms like Bayes, Perceptron, etc. + +And then it would be interesting to use this approach on photos of real cats and dogs. + +# 7. Code + + #!/usr/bin/env python + + import cv2, cv, sys, math, os, numpy + from scipy.spatial import KDTree + + def extractFeatures(label): + + directory = "img/" + label + "/" + + features = [] + + for fn in os.listdir(directory): + + img = cv2.imread(directory + fn, 0) + + # find edges + canny = cv2.Canny(img, 50, 100) + + # find colored + black_pixels = numpy.count_nonzero(img) + + # find lines lines + lines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1) + + lengths = [] + angles = [] + try: + for line in lines[0]: + x1, y1, x2, y2 = line + + # Pythagoras + a2 = math.pow((x1-x2), 2) + b2 = math.pow((y1-y2), 2) + length = int(math.sqrt(a2 + b2)) + lengths.append(length) + + angle = int(math.degrees(math.atan((y1-y2) / (x1-x2)))) + angles.append(angle) + except: + pass + + # print out everything + lines_count = len(lengths) + mid_length = sum(lengths) / lines_count + mid_angle = sum(angles) / lines_count + + features.append([ + [lines_count, mid_length, mid_angle, black_pixels], + label + ]) + + return features + + + if __name__ == "__main__": + cats = extractFeatures("cat") + dogs = extractFeatures("dog") + + test_count = 5 + + test_data = dogs[:test_count] + cats[:test_count] + test_labels = map(lambda a: a[1], test_data) + test_features = map(lambda a: a[0], test_data) + + data = cats[test_count:] + dogs[test_count:] + labels = map(lambda a: a[1], data) + features = map(lambda a: a[0], data) + + tree = KDTree(features) + + for t in xrange(0, test_count * 2): + d, i = tree.query(test_features[t], k=3) + print "-" + for j in xrange(0, len(i)): + print test_labels[t] + " is a " + labels[i[j]] \ No newline at end of file