diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b6c89d --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +Categorization of drawn cat and dog faces +========================================= + +Under [ml/](ml/) you can find my attempt to do it with machine learning, +where I decide myself how to extract which features and later use a simple +algorithm to classify. + +Under [tf/](tf/) you can find my attempt to do it with TensorFlow, which +is a neural network which extracts it's own features and neurons classify +the pictures. diff --git a/convert.sh b/convert.sh new file mode 100755 index 0000000..7e031d8 --- /dev/null +++ b/convert.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +NEW="new_$1" +rm -rf $NEW +mkdir $NEW + +for i in `ls -1 $1` +do + convert $1/$i -adaptive-resize 200x200\> -size 200x200 xc:white +swap -gravity center -composite $NEW/$i +done diff --git a/images/bg_hr.png b/images/bg_hr.png deleted file mode 100644 index 7973bd6..0000000 Binary files a/images/bg_hr.png and /dev/null differ diff --git a/images/blacktocat.png b/images/blacktocat.png deleted file mode 100644 index 6e264fe..0000000 Binary files a/images/blacktocat.png and /dev/null differ diff --git a/images/icon_download.png b/images/icon_download.png deleted file mode 100644 index a2a287f..0000000 Binary files a/images/icon_download.png and /dev/null differ diff --git a/images/sprite_download.png b/images/sprite_download.png deleted file mode 100644 index f2babd5..0000000 Binary files a/images/sprite_download.png and /dev/null differ diff --git a/img/cat/1.png b/img/cat/1.png new file mode 100644 index 0000000..f5d2fa2 Binary files /dev/null and b/img/cat/1.png differ diff --git a/img/cat/10.png b/img/cat/10.png new file mode 100644 index 0000000..d5db6d1 Binary files /dev/null and b/img/cat/10.png differ diff --git a/img/cat/11.png b/img/cat/11.png new file mode 100644 index 0000000..7ec5c71 Binary files /dev/null and b/img/cat/11.png differ diff --git a/img/cat/12.png b/img/cat/12.png new file mode 100644 index 0000000..3d6faf4 Binary files /dev/null and b/img/cat/12.png differ diff --git a/img/cat/13.png b/img/cat/13.png new file mode 100644 index 0000000..97efb02 Binary files /dev/null and b/img/cat/13.png differ diff --git a/img/cat/14.png b/img/cat/14.png new file mode 100644 index 0000000..4fed457 Binary files /dev/null and b/img/cat/14.png differ diff --git a/img/cat/15.png b/img/cat/15.png new file mode 100644 index 0000000..3ab6f91 Binary files /dev/null and b/img/cat/15.png differ diff --git a/img/cat/16.png b/img/cat/16.png new file mode 100644 index 0000000..5ce067f Binary files /dev/null and b/img/cat/16.png differ diff --git a/img/cat/17.png b/img/cat/17.png new file mode 100644 index 0000000..ccca4a3 Binary files /dev/null and b/img/cat/17.png differ diff --git a/img/cat/18.png b/img/cat/18.png new file mode 100644 index 0000000..d528bbe Binary files /dev/null and b/img/cat/18.png differ diff --git a/img/cat/19.png b/img/cat/19.png new file mode 100644 index 0000000..aff8023 Binary files /dev/null and b/img/cat/19.png differ diff --git a/img/cat/2.png b/img/cat/2.png new file mode 100644 index 0000000..81e417b Binary files /dev/null and b/img/cat/2.png differ diff --git a/img/cat/20.png b/img/cat/20.png new file mode 100644 index 0000000..fe2d584 Binary files /dev/null and b/img/cat/20.png differ diff --git a/img/cat/21.png b/img/cat/21.png new file mode 100644 index 0000000..df90a5c Binary files /dev/null and b/img/cat/21.png differ diff --git a/img/cat/22.png b/img/cat/22.png new file mode 100644 index 0000000..1443637 Binary files /dev/null and b/img/cat/22.png differ diff --git a/img/cat/23.png b/img/cat/23.png new file mode 100644 index 0000000..37aa2db Binary files /dev/null and b/img/cat/23.png differ diff --git a/img/cat/24.png b/img/cat/24.png new file mode 100644 index 0000000..ea1a480 Binary files /dev/null and b/img/cat/24.png differ diff --git a/img/cat/25.png b/img/cat/25.png new file mode 100644 index 0000000..50ce53f Binary files /dev/null and b/img/cat/25.png differ diff --git a/img/cat/26.png b/img/cat/26.png new file mode 100644 index 0000000..6c5a51c Binary files /dev/null and b/img/cat/26.png differ diff --git a/img/cat/27.png b/img/cat/27.png new file mode 100644 index 0000000..29c263b Binary files /dev/null and b/img/cat/27.png differ diff --git a/img/cat/28.png b/img/cat/28.png new file mode 100644 index 0000000..b5f0db8 Binary files /dev/null and b/img/cat/28.png differ diff --git a/img/cat/29.png b/img/cat/29.png new file mode 100644 index 0000000..dd8f10c Binary files /dev/null and b/img/cat/29.png differ diff --git a/img/cat/3.png b/img/cat/3.png new file mode 100644 index 0000000..a3b1a53 Binary files /dev/null and b/img/cat/3.png differ diff --git a/img/cat/30.png b/img/cat/30.png new file mode 100644 index 0000000..843acb5 Binary files /dev/null and b/img/cat/30.png differ diff --git a/img/cat/31.png b/img/cat/31.png new file mode 100644 index 0000000..201a406 Binary files /dev/null and b/img/cat/31.png differ diff --git a/img/cat/32.png b/img/cat/32.png new file mode 100644 index 0000000..4c91e9c Binary files /dev/null and b/img/cat/32.png differ diff --git a/img/cat/33.png b/img/cat/33.png new file mode 100644 index 0000000..c61f711 Binary files /dev/null and b/img/cat/33.png differ diff --git a/img/cat/34.png b/img/cat/34.png new file mode 100644 index 0000000..d5b689f Binary files /dev/null and b/img/cat/34.png differ diff --git a/img/cat/35.png b/img/cat/35.png new file mode 100644 index 0000000..0f5d0de Binary files /dev/null and b/img/cat/35.png differ diff --git a/img/cat/36.png b/img/cat/36.png new file mode 100644 index 0000000..2dccd69 Binary files /dev/null and b/img/cat/36.png differ diff --git a/img/cat/37.png b/img/cat/37.png new file mode 100644 index 0000000..8d03b22 Binary files /dev/null and b/img/cat/37.png differ diff --git a/img/cat/38.png b/img/cat/38.png new file mode 100644 index 0000000..612dac4 Binary files /dev/null and b/img/cat/38.png differ diff --git a/img/cat/39.png b/img/cat/39.png new file mode 100644 index 0000000..9d35a1e Binary files /dev/null and b/img/cat/39.png differ diff --git a/img/cat/4.png b/img/cat/4.png new file mode 100644 index 0000000..4c086da Binary files /dev/null and b/img/cat/4.png differ diff --git a/img/cat/40.png b/img/cat/40.png new file mode 100644 index 0000000..a19812c Binary files /dev/null and b/img/cat/40.png differ diff --git a/img/cat/41.png b/img/cat/41.png new file mode 100644 index 0000000..69a98b0 Binary files /dev/null and b/img/cat/41.png differ diff --git a/img/cat/42.png b/img/cat/42.png new file mode 100644 index 0000000..df116d8 Binary files /dev/null and b/img/cat/42.png differ diff --git a/img/cat/43.png b/img/cat/43.png new file mode 100644 index 0000000..bdd369c Binary files /dev/null and b/img/cat/43.png differ diff --git a/img/cat/44.png b/img/cat/44.png new file mode 100644 index 0000000..1412a5a Binary files /dev/null and b/img/cat/44.png differ diff --git a/img/cat/45.png b/img/cat/45.png new file mode 100644 index 0000000..d66018c Binary files /dev/null and b/img/cat/45.png differ diff --git a/img/cat/46.png b/img/cat/46.png new file mode 100644 index 0000000..cccbdfe Binary files /dev/null and b/img/cat/46.png differ diff --git a/img/cat/47.png b/img/cat/47.png new file mode 100644 index 0000000..54a6aed Binary files /dev/null and b/img/cat/47.png differ diff --git a/img/cat/48.png b/img/cat/48.png new file mode 100644 index 0000000..42e850f Binary files /dev/null and b/img/cat/48.png differ diff --git a/img/cat/49.png b/img/cat/49.png new file mode 100644 index 0000000..f8cfda4 Binary files /dev/null and b/img/cat/49.png differ diff --git a/img/cat/5.png b/img/cat/5.png new file mode 100644 index 0000000..e77bc3a Binary files /dev/null and b/img/cat/5.png differ diff --git a/img/cat/50.png b/img/cat/50.png new file mode 100644 index 0000000..376b471 Binary files /dev/null and b/img/cat/50.png differ diff --git a/img/cat/51.png b/img/cat/51.png new file mode 100644 index 0000000..9b89c58 Binary files /dev/null and b/img/cat/51.png differ diff --git a/img/cat/52.png b/img/cat/52.png new file mode 100644 index 0000000..e2f3a11 Binary files /dev/null and b/img/cat/52.png differ diff --git a/img/cat/54.png b/img/cat/54.png new file mode 100644 index 0000000..e7ac183 Binary files /dev/null and b/img/cat/54.png differ diff --git a/img/cat/55.png b/img/cat/55.png new file mode 100644 index 0000000..27c3a6a Binary files /dev/null and b/img/cat/55.png differ diff --git a/img/cat/56.png b/img/cat/56.png new file mode 100644 index 0000000..f6ede4b Binary files /dev/null and b/img/cat/56.png differ diff --git a/img/cat/57.png b/img/cat/57.png new file mode 100644 index 0000000..921f054 Binary files /dev/null and b/img/cat/57.png differ diff --git a/img/cat/58.png b/img/cat/58.png new file mode 100644 index 0000000..7a59712 Binary files /dev/null and b/img/cat/58.png differ diff --git a/img/cat/59.png b/img/cat/59.png new file mode 100644 index 0000000..ec6ba49 Binary files /dev/null and b/img/cat/59.png differ diff --git a/img/cat/6.png b/img/cat/6.png new file mode 100644 index 0000000..7e0112e Binary files /dev/null and b/img/cat/6.png differ diff --git a/img/cat/60.png b/img/cat/60.png new file mode 100644 index 0000000..bf4a0c8 Binary files /dev/null and b/img/cat/60.png differ diff --git a/img/cat/61.png b/img/cat/61.png new file mode 100644 index 0000000..553a327 Binary files /dev/null and b/img/cat/61.png differ diff --git a/img/cat/62.png b/img/cat/62.png new file mode 100644 index 0000000..952732f Binary files /dev/null and b/img/cat/62.png differ diff --git a/img/cat/63.png b/img/cat/63.png new file mode 100644 index 0000000..793a5f4 Binary files /dev/null and b/img/cat/63.png differ diff --git a/img/cat/64.png b/img/cat/64.png new file mode 100644 index 0000000..b37c608 Binary files /dev/null and b/img/cat/64.png differ diff --git a/img/cat/65.png b/img/cat/65.png new file mode 100644 index 0000000..8410e61 Binary files /dev/null and b/img/cat/65.png differ diff --git a/img/cat/7.png b/img/cat/7.png new file mode 100644 index 0000000..1f7c5da Binary files /dev/null and b/img/cat/7.png differ diff --git a/img/cat/8.png b/img/cat/8.png new file mode 100644 index 0000000..f48b06d Binary files /dev/null and b/img/cat/8.png differ diff --git a/img/cat/9.png b/img/cat/9.png new file mode 100644 index 0000000..da7634b Binary files /dev/null and b/img/cat/9.png differ diff --git a/img/dog/1.png b/img/dog/1.png new file mode 100644 index 0000000..aea3814 Binary files /dev/null and b/img/dog/1.png differ diff --git a/img/dog/10.png b/img/dog/10.png new file mode 100644 index 0000000..8d79e2b Binary files /dev/null and b/img/dog/10.png differ diff --git a/img/dog/11.png b/img/dog/11.png new file mode 100644 index 0000000..6f1e0e1 Binary files /dev/null and b/img/dog/11.png differ diff --git a/img/dog/12.png b/img/dog/12.png new file mode 100644 index 0000000..aa4ef1d Binary files /dev/null and b/img/dog/12.png differ diff --git a/img/dog/13.png b/img/dog/13.png new file mode 100644 index 0000000..a6b84bb Binary files /dev/null and b/img/dog/13.png differ diff --git a/img/dog/14.png b/img/dog/14.png new file mode 100644 index 0000000..81a7e2b Binary files /dev/null and b/img/dog/14.png differ diff --git a/img/dog/15.png b/img/dog/15.png new file mode 100644 index 0000000..69aaca4 Binary files /dev/null and b/img/dog/15.png differ diff --git a/img/dog/16.png b/img/dog/16.png new file mode 100644 index 0000000..ec4911e Binary files /dev/null and b/img/dog/16.png differ diff --git a/img/dog/17.png b/img/dog/17.png new file mode 100644 index 0000000..50a5d18 Binary files /dev/null and b/img/dog/17.png differ diff --git a/img/dog/18.png b/img/dog/18.png new file mode 100644 index 0000000..4f4d834 Binary files /dev/null and b/img/dog/18.png differ diff --git a/img/dog/19.png b/img/dog/19.png new file mode 100644 index 0000000..ebc4cb7 Binary files /dev/null and b/img/dog/19.png differ diff --git a/img/dog/2.png b/img/dog/2.png new file mode 100644 index 0000000..ffa6fc0 Binary files /dev/null and b/img/dog/2.png differ diff --git a/img/dog/20.png b/img/dog/20.png new file mode 100644 index 0000000..e94c52b Binary files /dev/null and b/img/dog/20.png differ diff --git a/img/dog/21.png b/img/dog/21.png new file mode 100644 index 0000000..b2eb6c3 Binary files /dev/null and b/img/dog/21.png differ diff --git a/img/dog/22.png b/img/dog/22.png new file mode 100644 index 0000000..c55cc3f Binary files /dev/null and b/img/dog/22.png differ diff --git a/img/dog/23.png b/img/dog/23.png new file mode 100644 index 0000000..4dd14c1 Binary files /dev/null and b/img/dog/23.png differ diff --git a/img/dog/24.png b/img/dog/24.png new file mode 100644 index 0000000..14c919a Binary files /dev/null and b/img/dog/24.png differ diff --git a/img/dog/25.png b/img/dog/25.png new file mode 100644 index 0000000..2f4ee1b Binary files /dev/null and b/img/dog/25.png differ diff --git a/img/dog/26.png b/img/dog/26.png new file mode 100644 index 0000000..0d70713 Binary files /dev/null and b/img/dog/26.png differ diff --git a/img/dog/27.png b/img/dog/27.png new file mode 100644 index 0000000..cef11c5 Binary files /dev/null and b/img/dog/27.png differ diff --git a/img/dog/28.png b/img/dog/28.png new file mode 100644 index 0000000..6039765 Binary files /dev/null and b/img/dog/28.png differ diff --git a/img/dog/29.png b/img/dog/29.png new file mode 100644 index 0000000..55659a8 Binary files /dev/null and b/img/dog/29.png differ diff --git a/img/dog/3.png b/img/dog/3.png new file mode 100644 index 0000000..ed9ba37 Binary files /dev/null and b/img/dog/3.png differ diff --git a/img/dog/30.png b/img/dog/30.png new file mode 100644 index 0000000..84f258a Binary files /dev/null and b/img/dog/30.png differ diff --git a/img/dog/31.png b/img/dog/31.png new file mode 100644 index 0000000..7ad6c5d Binary files /dev/null and b/img/dog/31.png differ diff --git a/img/dog/32.png b/img/dog/32.png new file mode 100644 index 0000000..00cf110 Binary files /dev/null and b/img/dog/32.png differ diff --git a/img/dog/33.png b/img/dog/33.png new file mode 100644 index 0000000..8b72422 Binary files /dev/null and b/img/dog/33.png differ diff --git a/img/dog/34.png b/img/dog/34.png new file mode 100644 index 0000000..4d15bd4 Binary files /dev/null and b/img/dog/34.png differ diff --git a/img/dog/35.png b/img/dog/35.png new file mode 100644 index 0000000..e433d3e Binary files /dev/null and b/img/dog/35.png differ diff --git a/img/dog/36.png b/img/dog/36.png new file mode 100644 index 0000000..d8068d2 Binary files /dev/null and b/img/dog/36.png differ diff --git a/img/dog/37.png b/img/dog/37.png new file mode 100644 index 0000000..db6b481 Binary files /dev/null and b/img/dog/37.png differ diff --git a/img/dog/38.png b/img/dog/38.png new file mode 100644 index 0000000..db0eef2 Binary files /dev/null and b/img/dog/38.png differ diff --git a/img/dog/39.png b/img/dog/39.png new file mode 100644 index 0000000..4a0184e Binary files /dev/null and b/img/dog/39.png differ diff --git a/img/dog/4.png b/img/dog/4.png new file mode 100644 index 0000000..bd6de71 Binary files /dev/null and b/img/dog/4.png differ diff --git a/img/dog/40.png b/img/dog/40.png new file mode 100644 index 0000000..604d295 Binary files /dev/null and b/img/dog/40.png differ diff --git a/img/dog/41.png b/img/dog/41.png new file mode 100644 index 0000000..a98a2ce Binary files /dev/null and b/img/dog/41.png differ diff --git a/img/dog/43.png b/img/dog/43.png new file mode 100644 index 0000000..63cdd31 Binary files /dev/null and b/img/dog/43.png differ diff --git a/img/dog/44.png b/img/dog/44.png new file mode 100644 index 0000000..69ea750 Binary files /dev/null and b/img/dog/44.png differ diff --git a/img/dog/45.png b/img/dog/45.png new file mode 100644 index 0000000..29b40fa Binary files /dev/null and b/img/dog/45.png differ diff --git a/img/dog/46.png b/img/dog/46.png new file mode 100644 index 0000000..b58b32e Binary files /dev/null and b/img/dog/46.png differ diff --git a/img/dog/47.png b/img/dog/47.png new file mode 100644 index 0000000..7684a17 Binary files /dev/null and b/img/dog/47.png differ diff --git a/img/dog/48.png b/img/dog/48.png new file mode 100644 index 0000000..75b8b6f Binary files /dev/null and b/img/dog/48.png differ diff --git a/img/dog/49.png b/img/dog/49.png new file mode 100644 index 0000000..fe738c9 Binary files /dev/null and b/img/dog/49.png differ diff --git a/img/dog/5.png b/img/dog/5.png new file mode 100644 index 0000000..9db7f19 Binary files /dev/null and b/img/dog/5.png differ diff --git a/img/dog/50.png b/img/dog/50.png new file mode 100644 index 0000000..cbd10cd Binary files /dev/null and b/img/dog/50.png differ diff --git a/img/dog/51.png b/img/dog/51.png new file mode 100644 index 0000000..b823d74 Binary files /dev/null and b/img/dog/51.png differ diff --git a/img/dog/52.png b/img/dog/52.png new file mode 100644 index 0000000..1a00fad Binary files /dev/null and b/img/dog/52.png differ diff --git a/img/dog/53.png b/img/dog/53.png new file mode 100644 index 0000000..7089f5d Binary files /dev/null and b/img/dog/53.png differ diff --git a/img/dog/54.png b/img/dog/54.png new file mode 100644 index 0000000..4581dcc Binary files /dev/null and b/img/dog/54.png differ diff --git a/img/dog/55.png b/img/dog/55.png new file mode 100644 index 0000000..c9192dd Binary files /dev/null and b/img/dog/55.png differ diff --git a/img/dog/56.png b/img/dog/56.png new file mode 100644 index 0000000..1b0267d Binary files /dev/null and b/img/dog/56.png differ diff --git a/img/dog/57.png b/img/dog/57.png new file mode 100644 index 0000000..9a27022 Binary files /dev/null and b/img/dog/57.png differ diff --git a/img/dog/58.png b/img/dog/58.png new file mode 100644 index 0000000..1cfba41 Binary files /dev/null and b/img/dog/58.png differ diff --git a/img/dog/59.png b/img/dog/59.png new file mode 100644 index 0000000..0c40a55 Binary files /dev/null and b/img/dog/59.png differ diff --git a/img/dog/6.png b/img/dog/6.png new file mode 100644 index 0000000..c3d0b81 Binary files /dev/null and b/img/dog/6.png differ diff --git a/img/dog/60.png b/img/dog/60.png new file mode 100644 index 0000000..e069f57 Binary files /dev/null and b/img/dog/60.png differ diff --git a/img/dog/61.png b/img/dog/61.png new file mode 100644 index 0000000..4269f2d Binary files /dev/null and b/img/dog/61.png differ diff --git a/img/dog/62.png b/img/dog/62.png new file mode 100644 index 0000000..57f858c Binary files /dev/null and b/img/dog/62.png differ diff --git a/img/dog/63.png b/img/dog/63.png new file mode 100644 index 0000000..e80c861 Binary files /dev/null and b/img/dog/63.png differ diff --git a/img/dog/64.png b/img/dog/64.png new file mode 100644 index 0000000..9ddf64a Binary files /dev/null and b/img/dog/64.png differ diff --git a/img/dog/7.png b/img/dog/7.png new file mode 100644 index 0000000..6922a37 Binary files /dev/null and b/img/dog/7.png differ diff --git a/img/dog/8.png b/img/dog/8.png new file mode 100644 index 0000000..b156b3a Binary files /dev/null and b/img/dog/8.png differ diff --git a/img/dog/9.png b/img/dog/9.png new file mode 100644 index 0000000..e241d0b Binary files /dev/null and b/img/dog/9.png differ diff --git a/index.html b/index.html deleted file mode 100644 index 46c41c1..0000000 --- a/index.html +++ /dev/null @@ -1,279 +0,0 @@ - - - - - - - - - - - Cat vs. dog drawings categorization - - - - - -
-
- View on GitHub - -

Cat vs. dog drawings categorization

-

A university project

- -
- Download this project as a .zip file - Download this project as a tar.gz file -
-
-
- - -
-
-

-1. Introduction

- -

-1.1. Goal

- -

The goal of this project was to give a computer a drawing of either a cat's or a dog's face and let it recognize with high probability whether a cat or a dog is shown.

- -

-1.2. Scope

- -

First I thought that I would get lots of people to draw cat and dog faces for me, but I later realized that it was far too time consuming. Therefore I had to change the scope from recognizing random peoples drawings to recognizing my own drawings, which is obviously easier. Everything else did not change that much, I would just get better results.

- -

-2. Preparation

- -

-2.1. Drawing and taking a photo

- -

The raw drawings

- -

I drew eight A4 sheets of such cat and dog faces which resulted in 64 dog faces and 60 cat faces. Then I took pictures of them with my digital camera.

- -

There was a huge difference in quality between the pictures I took with my iPhone 4 camera and the ones I took with my Nikon D5000. In fact I was not able to use the pictures I took with the iPhone because it was impossible to find straight lines in them.

- -

You can see the result here, one with the iPhone image as a source and the other with the Nikon image:

- -

iPhone vs. Nikon sample

- -

-2.2. Photoshop

- -

I cleaned up the drawings so it would be easier for the algorithm to find everything. I opened the pictures of the drawings in Photoshop and played with the contrast and brightness settings.

- -

Then I cut out all the drawings from the big image and saved them as a black and white PNG images without dither.

- -

Steps in Photoshop

- -

-2.3. Resizing

- -

I wrote a small shellscript which would take all pictures and resize them proportionally to a max width and height of 200 px. It also fills up the missing borders with a white background color. To do that I used the ImageMagick software suite:

- -
#!/bin/sh
-
-NEW="new_$1"
-rm -rf $NEW
-mkdir $NEW
-
-for i in `ls -1 $1`
-do
-    convert $1/$i \
-        -adaptive-resize 200x200\> \
-        -size 200x200 xc:white +swap \
-        -gravity center \
-        -composite \
-        $NEW/$i
-done
- -

After that all the images had uniform sizes and colors so that I was able to compare them in a meaningful way.

- -

-3. Feature extraction

- -

The next step was to extract the features from the images. In other words find things in the pictures that would be unique enough to make a difference between cats and dogs but broad enough so that all dogs would fall into one category and all cats into the other.

- -

-3.1. Straight lines

- -

The first thing which came to mind was counting and doing other stuff with straight lines in the image.

- -

-3.1.1 Canny edge detector

- -

I used an edge detector algorithm called Canny to preprocess the images which - as the name implies - finds edges in images. Because of my preparation with Photoshop it was quite easy for it to find them. It is not easy to see this step with my drawings, so here is a picture of how it looks like when you do this with a photo instead:

- -

Canny on a photo from Wikipedia

- -

It basically removes noise with a gausian filter and then finds the intentisty gradians of the image with help of some trigonometry.

- -

I did not implement the algorithm myself, instead I used the often used OpenCV implementation.

- -

-3.1.2 Hough transform

- -

To find the lines I used the Hough transform algorithm. The red lines are those which the Hough transform algorithm found in the example picture:

- -

Hough lines

- -

It essentially groups edges, which can be imperfect, to object candidates by performing an explicit voting procedure. Detecting straight lines can be done by describing them as y = mx + b where m is the slope of the line and b is the intercept. The line is not represented by descrete points (x1,y1)(x2,y2) but instead as a point(x,y) in the parameter space, which makes detection of lines, which are a bit off, possible. In practice it is still more complicated, please read the Wikipedia article about it.

- -

I did not implement it myself but used the often used and tested probabilistic OpenCV implementation.

- -

-3.2. Line features

- -

I extracted these features from the lines:

- -

-3.3. Other features

- -

I also extracted the amount of black pixels in the image to use it as a possible feature which was not using the extracted lines.

- -

-4. k-nearest neighbor algorithm

- -

I chose to use the k-Nearest Neighbors algorithm which only locally looks at the neighbors of the document in a radius predefined by the user. It assumes that the document is of the same category as the highest number of neighbors within this radius. -In the following figure you can see that depending if the user choses k = 3, as shown by the solid line, the algorithm will conclude that the document in the center (green smiley) is of the type triangle because most of this three neighbors are triangles. If on the other hand the user choses k = 7, as shown by the dotted line, then the amount of neighbors which are rectangles is greater as the amount of neighbors which are triangles, so it concludes that the smiley is of type rectangle.

- -

k-Nearest Neighbours as a graphic

- -

In the picture above you see how it would look with two dimensions. I have been using four features so the algorithm had to check the distance to the neighbours in four dimensions. This is not really more difficult, it is just more to calculate.

- -

-5. Results

- -

The results were quite encouraging, I assume it is because I only used one style to draw the dogs and one style to draw the cats.

- -

-5.1. k-fold Cross-validation

- -

I used 10 fold cross-validation for every test I did, which means that I used 90% of the available data for the learning algorithms and then the remaining 10% to test how they performed. I repeated this ten times until all data had been used for testing once.

- -

-5.2. Results with all features

- -

When I used all of the features and three nearest neighbours I got amazing 100% accuracy, which was kind of suspect because that normally means that you most probably did something wrong.

- -

-5.3. Results with a reduced feature set

- -

Therefore I tried to reduce the features to check if it would perform worse.

- -
    -
  1. When I removed the information about the amount of black pixels basically nothing happened.
  2. -
  3. When I removed the information about the amount of lines and average length at least I got a couple of wrong categorized images, the accuracy went down to 95%.
  4. -
  5. When I removed the information about the average angle of the lines, that was when I got significant errors. The accuracy dropped down to about 60%, which is still better then pure chance.
  6. -

So it seems like the best feature to detect cat and dog face drawings done by me was the average angle of the straight lines in the image.

- -

-6. Future study

- -

The most important next step would be to gather many more drawings done by other people who use other styles to draw cat and dog faces.

- -

Then it would be interesting to use other learning algorithms like Bayes, Perceptron, etc.

- -

And then it would be interesting to use this approach on photos of real cats and dogs.

- -

-7. Code

- -
#!/usr/bin/env python
-
-import cv2, cv, sys, math, os, numpy
-from scipy.spatial import KDTree
-
-def extractFeatures(label):
-
-    directory = "img/" + label + "/"
-
-    features = []
-
-    for fn in os.listdir(directory):
-
-        img = cv2.imread(directory + fn, 0)
-
-        # find edges
-        canny = cv2.Canny(img, 50, 100)
-
-        # find colored
-        black_pixels = numpy.count_nonzero(img)
-
-        # find lines lines
-        lines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1)
-
-        lengths = []
-        angles = []
-        try:
-            for line in lines[0]:
-                x1, y1, x2, y2 = line
-
-                # Pythagoras
-                a2 = math.pow((x1-x2), 2)
-                b2 = math.pow((y1-y2), 2)
-                length = int(math.sqrt(a2 + b2))
-                lengths.append(length)
-
-                angle = int(math.degrees(math.atan((y1-y2) / (x1-x2))))
-                angles.append(angle)
-        except:
-            pass
-
-        # print out everything
-        lines_count = len(lengths)
-        mid_length = sum(lengths) / lines_count
-        mid_angle = sum(angles) / lines_count
-
-        features.append([
-            [lines_count, mid_length, mid_angle, black_pixels],
-            label
-        ])
-
-    return features
-
-
-if __name__ == "__main__":
-    cats = extractFeatures("cat")
-    dogs = extractFeatures("dog")
-
-    test_count = 5
-
-    test_data = dogs[:test_count] + cats[:test_count] 
-    test_labels = map(lambda a: a[1], test_data)
-    test_features = map(lambda a: a[0], test_data)
-
-    data = cats[test_count:] + dogs[test_count:]
-    labels = map(lambda a: a[1], data)
-    features = map(lambda a: a[0], data)
-
-    tree = KDTree(features)
-
-    for t in xrange(0, test_count * 2):
-        d, i = tree.query(test_features[t], k=3)
-        print "-"
-        for j in xrange(0, len(i)):
-            print test_labels[t] + " is a " + labels[i[j]]
-
-
-
- - - - - - - - diff --git a/javascripts/main.js b/javascripts/main.js deleted file mode 100644 index d8135d3..0000000 --- a/javascripts/main.js +++ /dev/null @@ -1 +0,0 @@ -console.log('This would be the main JS file.'); diff --git a/ml/Presentation-Swedish.odp b/ml/Presentation-Swedish.odp new file mode 100644 index 0000000..b3f14a1 Binary files /dev/null and b/ml/Presentation-Swedish.odp differ diff --git a/ml/README.md b/ml/README.md new file mode 100644 index 0000000..ba8af1c --- /dev/null +++ b/ml/README.md @@ -0,0 +1,210 @@ +# 1. Introduction + +## 1.1. Goal + +The goal of this project was to give a computer a drawing of either a cat's or a dog's face and let it recognize with high probability whether a cat or a dog is shown. + +## 1.2. Scope + +First I thought that I would get lots of people to draw cat and dog faces for me, but I later realized that it was far too time consuming. Therefore I had to change the scope from recognizing random peoples drawings to recognizing my own drawings, which is obviously easier. Everything else did not change that much, I would just get better results. + +# 2. Preparation + +## 2.1. Drawing and taking a photo + +![The raw drawings](https://jeena.net/images/2013/catdog/drawing-taking-photo.jpg) + +I drew eight A4 sheets of such cat and dog faces which resulted in 64 dog faces and 60 cat faces. Then I took pictures of them with my digital camera. + +There was a huge difference in quality between the pictures I took with my iPhone 4 camera and the ones I took with my Nikon D5000. In fact I was not able to use the pictures I took with the iPhone because it was impossible to find straight lines in them. + +You can see the result here, one with the iPhone image as a source and the other with the Nikon image: + +![iPhone vs. Nikon sample](https://jeena.net/images/2013/catdog/iphone-sample.jpg) + +## 2.2. Photoshop + +I cleaned up the drawings so it would be easier for the algorithm to find everything. I opened the pictures of the drawings in Photoshop and played with the contrast and brightness settings. + +Then I cut out all the drawings from the big image and saved them as a black and white PNG images without dither. + +![Steps in Photoshop](https://jeena.net/images/2013/catdog/photoshop.jpg) + +## 2.3. Resizing + +I wrote a small shellscript which would take all pictures and resize them proportionally to a max width and height of 200 px. It also fills up the missing borders with a white background color. To do that I used the [ImageMagick](www.imagemagick.org) software suite: + +
#!/bin/sh
+
+NEW="new_$1"
+rm -rf $NEW
+mkdir $NEW
+
+for i in `ls -1 $1`
+do
+	convert $1/$i \
+		-adaptive-resize 200x200\> \
+		-size 200x200 xc:white +swap \
+		-gravity center \
+		-composite \
+		$NEW/$i
+done
+ +After that all the images had uniform sizes and colors so that I was able to compare them in a meaningful way. + +# 3. Feature extraction + +The next step was to extract the features from the images. In other words find things in the pictures that would be unique enough to make a difference between cats and dogs but broad enough so that all dogs would fall into one category and all cats into the other. + +## 3.1. Straight lines + +The first thing which came to mind was counting and doing other stuff with straight lines in the image. + +### 3.1.1 Canny edge detector + +I used an edge detector algorithm called Canny to preprocess the images which - as the name implies - finds edges in images. Because of my preparation with Photoshop it was quite easy for it to find them. It is not easy to see this step with my drawings, so here is a picture of how it looks like when you do this with a photo instead: + +![Canny on a photo from Wikipedia](https://jeena.net/images/2013/catdog/canny.jpg) + +It basically removes noise with a gausian filter and then finds the intentisty gradians of the image with help of some trigonometry. + +I did not implement the algorithm myself, instead I used the often used [OpenCV implementation](http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.html). + +### 3.1.2 Hough transform + +To find the lines I used the [Hough transform](https://en.wikipedia.org/wiki/Hough_transform) algorithm. The red lines are those which the Hough transform algorithm found in the example picture: + +![Hough lines](https://jeena.net/images/2013/catdog/hough.png) + +It essentially groups edges, which can be imperfect, to object candidates by performing an explicit voting procedure. Detecting straight lines can be done by describing them as y = mx + b where m is the slope of the line and b is the intercept. The line is not represented by descrete points (x1,y1)(x2,y2) but instead as a point(x,y) in the parameter space, which makes detection of lines, which are a bit off, possible. In practice it is still more complicated, please read the [Wikipedia article](https://en.wikipedia.org/wiki/Hough_transform) about it. + +I did not implement it myself but used the often used and tested probabilistic [OpenCV implementation](http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp). + +## 3.2. Line features + +I extracted these features from the lines: + +- amount of lines +- average length of lines +- average angle of lines + +## 3.3. Other features + +I also extracted the amount of black pixels in the image to use it as a possible feature which was not using the extracted lines. + +# 4. _k_-nearest neighbor algorithm + +I chose to use the _k_-Nearest Neighbors algorithm which only locally looks at the neighbors of the document in a radius predefined by the user. It assumes that the document is of the same category as the highest number of neighbors within this radius. +In the following figure you can see that depending if the user choses k = 3, as shown by the solid line, the algorithm will conclude that the document in the center (green smiley) is of the type triangle because most of this three neighbors are triangles. If on the other hand the user choses k = 7, as shown by the dotted line, then the amount of neighbors which are rectangles is greater as the amount of neighbors which are triangles, so it concludes that the smiley is of type rectangle. + +![k-Nearest Neighbours as a graphic](https://jeena.net/images/2013/catdog/k-nearest-neighbours.png) + +In the picture above you see how it would look with two dimensions. I have been using four features so the algorithm had to check the distance to the neighbours in four dimensions. This is not really more difficult, it is just more to calculate. + +# 5. Results + +The results were quite encouraging, I assume it is because I only used one style to draw the dogs and one style to draw the cats. + +## 5.1. k-fold Cross-validation + +I used 10 fold cross-validation for every test I did, which means that I used 90% of the available data for the learning algorithms and then the remaining 10% to test how they performed. I repeated this ten times until all data had been used for testing once. + +## 5.2. Results with all features + +When I used all of the features and three nearest neighbours I got amazing 100% accuracy, which was kind of suspect because that normally means that you most probably did something wrong. + +## 5.3. Results with a reduced feature set + +Therefore I tried to reduce the features to check if it would perform worse. + +1. When I removed the information about the amount of black pixels basically nothing happened. +2. When I removed the information about the amount of lines and average length at least I got a couple of wrong categorized images, the accuracy went down to 95%. +3. When I removed the information about the average angle of the lines, that was when I got significant errors. The accuracy dropped down to about 60%, which is still better then pure chance. + +So it seems like the best feature to detect cat and dog face drawings done by me was the average angle of the straight lines in the image. + +# 6. Future study + +The most important next step would be to gather many more drawings done by other people who use other styles to draw cat and dog faces. + +Then it would be interesting to use other learning algorithms like Bayes, Perceptron, etc. + +And then it would be interesting to use this approach on photos of real cats and dogs. + +# 7. Code + + #!/usr/bin/env python + + import cv2, cv, sys, math, os, numpy + from scipy.spatial import KDTree + + def extractFeatures(label): + + directory = "img/" + label + "/" + + features = [] + + for fn in os.listdir(directory): + + img = cv2.imread(directory + fn, 0) + + # find edges + canny = cv2.Canny(img, 50, 100) + + # find colored + black_pixels = numpy.count_nonzero(img) + + # find lines lines + lines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1) + + lengths = [] + angles = [] + try: + for line in lines[0]: + x1, y1, x2, y2 = line + + # Pythagoras + a2 = math.pow((x1-x2), 2) + b2 = math.pow((y1-y2), 2) + length = int(math.sqrt(a2 + b2)) + lengths.append(length) + + angle = int(math.degrees(math.atan((y1-y2) / (x1-x2)))) + angles.append(angle) + except: + pass + + # print out everything + lines_count = len(lengths) + mid_length = sum(lengths) / lines_count + mid_angle = sum(angles) / lines_count + + features.append([ + [lines_count, mid_length, mid_angle, black_pixels], + label + ]) + + return features + + + if __name__ == "__main__": + cats = extractFeatures("cat") + dogs = extractFeatures("dog") + + test_count = 5 + + test_data = dogs[:test_count] + cats[:test_count] + test_labels = map(lambda a: a[1], test_data) + test_features = map(lambda a: a[0], test_data) + + data = cats[test_count:] + dogs[test_count:] + labels = map(lambda a: a[1], data) + features = map(lambda a: a[0], data) + + tree = KDTree(features) + + for t in xrange(0, test_count * 2): + d, i = tree.query(test_features[t], k=3) + print "-" + for j in xrange(0, len(i)): + print test_labels[t] + " is a " + labels[i[j]] diff --git a/ml/classify.py b/ml/classify.py new file mode 100755 index 0000000..3168d70 --- /dev/null +++ b/ml/classify.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import cv2, cv, sys, math, os, numpy +from scipy.spatial import KDTree + +def extractFeatures(label): + + directory = "img/" + label + "/" + + features = [] + + for fn in os.listdir(directory): + + img = cv2.imread(directory + fn, 0) + + # find edges + canny = cv2.Canny(img, 50, 100) + + # find colored + black_pixels = numpy.count_nonzero(img) + + # find lines lines + lines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1) + + lengths = [] + angles = [] + try: + for line in lines[0]: + x1, y1, x2, y2 = line + + # Pythagoras + a2 = math.pow((x1-x2), 2) + b2 = math.pow((y1-y2), 2) + length = int(math.sqrt(a2 + b2)) + lengths.append(length) + + angle = int(math.degrees(math.atan((y1-y2) / (x1-x2)))) + angles.append(angle) + except: + pass + + # print out everything + lines_count = len(lengths) + mid_length = sum(lengths) / lines_count + mid_angle = sum(angles) / lines_count + + features.append([ + [lines_count, mid_length, mid_angle, black_pixels], + label + ]) + + return features + + +if __name__ == "__main__": + cats = extractFeatures("cat") + dogs = extractFeatures("dog") + + test_count = 5 + + test_data = dogs[:test_count] + cats[:test_count] + test_labels = map(lambda a: a[1], test_data) + test_features = map(lambda a: a[0], test_data) + + data = cats[test_count:] + dogs[test_count:] + labels = map(lambda a: a[1], data) + features = map(lambda a: a[0], data) + + tree = KDTree(features) + + for t in xrange(0, test_count * 2): + d, i = tree.query(test_features[t], k=3) + print "-" + for j in xrange(0, len(i)): + print test_labels[t] + " is predicted to be a " + labels[i[j]] + diff --git a/ml/report.md b/ml/report.md new file mode 100644 index 0000000..ba8af1c --- /dev/null +++ b/ml/report.md @@ -0,0 +1,210 @@ +# 1. Introduction + +## 1.1. Goal + +The goal of this project was to give a computer a drawing of either a cat's or a dog's face and let it recognize with high probability whether a cat or a dog is shown. + +## 1.2. Scope + +First I thought that I would get lots of people to draw cat and dog faces for me, but I later realized that it was far too time consuming. Therefore I had to change the scope from recognizing random peoples drawings to recognizing my own drawings, which is obviously easier. Everything else did not change that much, I would just get better results. + +# 2. Preparation + +## 2.1. Drawing and taking a photo + +![The raw drawings](https://jeena.net/images/2013/catdog/drawing-taking-photo.jpg) + +I drew eight A4 sheets of such cat and dog faces which resulted in 64 dog faces and 60 cat faces. Then I took pictures of them with my digital camera. + +There was a huge difference in quality between the pictures I took with my iPhone 4 camera and the ones I took with my Nikon D5000. In fact I was not able to use the pictures I took with the iPhone because it was impossible to find straight lines in them. + +You can see the result here, one with the iPhone image as a source and the other with the Nikon image: + +![iPhone vs. Nikon sample](https://jeena.net/images/2013/catdog/iphone-sample.jpg) + +## 2.2. Photoshop + +I cleaned up the drawings so it would be easier for the algorithm to find everything. I opened the pictures of the drawings in Photoshop and played with the contrast and brightness settings. + +Then I cut out all the drawings from the big image and saved them as a black and white PNG images without dither. + +![Steps in Photoshop](https://jeena.net/images/2013/catdog/photoshop.jpg) + +## 2.3. Resizing + +I wrote a small shellscript which would take all pictures and resize them proportionally to a max width and height of 200 px. It also fills up the missing borders with a white background color. To do that I used the [ImageMagick](www.imagemagick.org) software suite: + +
#!/bin/sh
+
+NEW="new_$1"
+rm -rf $NEW
+mkdir $NEW
+
+for i in `ls -1 $1`
+do
+	convert $1/$i \
+		-adaptive-resize 200x200\> \
+		-size 200x200 xc:white +swap \
+		-gravity center \
+		-composite \
+		$NEW/$i
+done
+ +After that all the images had uniform sizes and colors so that I was able to compare them in a meaningful way. + +# 3. Feature extraction + +The next step was to extract the features from the images. In other words find things in the pictures that would be unique enough to make a difference between cats and dogs but broad enough so that all dogs would fall into one category and all cats into the other. + +## 3.1. Straight lines + +The first thing which came to mind was counting and doing other stuff with straight lines in the image. + +### 3.1.1 Canny edge detector + +I used an edge detector algorithm called Canny to preprocess the images which - as the name implies - finds edges in images. Because of my preparation with Photoshop it was quite easy for it to find them. It is not easy to see this step with my drawings, so here is a picture of how it looks like when you do this with a photo instead: + +![Canny on a photo from Wikipedia](https://jeena.net/images/2013/catdog/canny.jpg) + +It basically removes noise with a gausian filter and then finds the intentisty gradians of the image with help of some trigonometry. + +I did not implement the algorithm myself, instead I used the often used [OpenCV implementation](http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.html). + +### 3.1.2 Hough transform + +To find the lines I used the [Hough transform](https://en.wikipedia.org/wiki/Hough_transform) algorithm. The red lines are those which the Hough transform algorithm found in the example picture: + +![Hough lines](https://jeena.net/images/2013/catdog/hough.png) + +It essentially groups edges, which can be imperfect, to object candidates by performing an explicit voting procedure. Detecting straight lines can be done by describing them as y = mx + b where m is the slope of the line and b is the intercept. The line is not represented by descrete points (x1,y1)(x2,y2) but instead as a point(x,y) in the parameter space, which makes detection of lines, which are a bit off, possible. In practice it is still more complicated, please read the [Wikipedia article](https://en.wikipedia.org/wiki/Hough_transform) about it. + +I did not implement it myself but used the often used and tested probabilistic [OpenCV implementation](http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp). + +## 3.2. Line features + +I extracted these features from the lines: + +- amount of lines +- average length of lines +- average angle of lines + +## 3.3. Other features + +I also extracted the amount of black pixels in the image to use it as a possible feature which was not using the extracted lines. + +# 4. _k_-nearest neighbor algorithm + +I chose to use the _k_-Nearest Neighbors algorithm which only locally looks at the neighbors of the document in a radius predefined by the user. It assumes that the document is of the same category as the highest number of neighbors within this radius. +In the following figure you can see that depending if the user choses k = 3, as shown by the solid line, the algorithm will conclude that the document in the center (green smiley) is of the type triangle because most of this three neighbors are triangles. If on the other hand the user choses k = 7, as shown by the dotted line, then the amount of neighbors which are rectangles is greater as the amount of neighbors which are triangles, so it concludes that the smiley is of type rectangle. + +![k-Nearest Neighbours as a graphic](https://jeena.net/images/2013/catdog/k-nearest-neighbours.png) + +In the picture above you see how it would look with two dimensions. I have been using four features so the algorithm had to check the distance to the neighbours in four dimensions. This is not really more difficult, it is just more to calculate. + +# 5. Results + +The results were quite encouraging, I assume it is because I only used one style to draw the dogs and one style to draw the cats. + +## 5.1. k-fold Cross-validation + +I used 10 fold cross-validation for every test I did, which means that I used 90% of the available data for the learning algorithms and then the remaining 10% to test how they performed. I repeated this ten times until all data had been used for testing once. + +## 5.2. Results with all features + +When I used all of the features and three nearest neighbours I got amazing 100% accuracy, which was kind of suspect because that normally means that you most probably did something wrong. + +## 5.3. Results with a reduced feature set + +Therefore I tried to reduce the features to check if it would perform worse. + +1. When I removed the information about the amount of black pixels basically nothing happened. +2. When I removed the information about the amount of lines and average length at least I got a couple of wrong categorized images, the accuracy went down to 95%. +3. When I removed the information about the average angle of the lines, that was when I got significant errors. The accuracy dropped down to about 60%, which is still better then pure chance. + +So it seems like the best feature to detect cat and dog face drawings done by me was the average angle of the straight lines in the image. + +# 6. Future study + +The most important next step would be to gather many more drawings done by other people who use other styles to draw cat and dog faces. + +Then it would be interesting to use other learning algorithms like Bayes, Perceptron, etc. + +And then it would be interesting to use this approach on photos of real cats and dogs. + +# 7. Code + + #!/usr/bin/env python + + import cv2, cv, sys, math, os, numpy + from scipy.spatial import KDTree + + def extractFeatures(label): + + directory = "img/" + label + "/" + + features = [] + + for fn in os.listdir(directory): + + img = cv2.imread(directory + fn, 0) + + # find edges + canny = cv2.Canny(img, 50, 100) + + # find colored + black_pixels = numpy.count_nonzero(img) + + # find lines lines + lines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1) + + lengths = [] + angles = [] + try: + for line in lines[0]: + x1, y1, x2, y2 = line + + # Pythagoras + a2 = math.pow((x1-x2), 2) + b2 = math.pow((y1-y2), 2) + length = int(math.sqrt(a2 + b2)) + lengths.append(length) + + angle = int(math.degrees(math.atan((y1-y2) / (x1-x2)))) + angles.append(angle) + except: + pass + + # print out everything + lines_count = len(lengths) + mid_length = sum(lengths) / lines_count + mid_angle = sum(angles) / lines_count + + features.append([ + [lines_count, mid_length, mid_angle, black_pixels], + label + ]) + + return features + + + if __name__ == "__main__": + cats = extractFeatures("cat") + dogs = extractFeatures("dog") + + test_count = 5 + + test_data = dogs[:test_count] + cats[:test_count] + test_labels = map(lambda a: a[1], test_data) + test_features = map(lambda a: a[0], test_data) + + data = cats[test_count:] + dogs[test_count:] + labels = map(lambda a: a[1], data) + features = map(lambda a: a[0], data) + + tree = KDTree(features) + + for t in xrange(0, test_count * 2): + d, i = tree.query(test_features[t], k=3) + print "-" + for j in xrange(0, len(i)): + print test_labels[t] + " is a " + labels[i[j]] diff --git a/params.json b/params.json deleted file mode 100644 index 96c81bc..0000000 --- a/params.json +++ /dev/null @@ -1 +0,0 @@ -{"name":"Cat vs. dog drawings categorization","tagline":"A university project","body":"# 1. Introduction\r\n\r\n## 1.1. Goal\r\n\r\nThe goal of this project was to give a computer a drawing of either a cat's or a dog's face and let it recognize with high probability whether a cat or a dog is shown.\r\n\r\n## 1.2. Scope\r\n\r\nFirst I thought that I would get lots of people to draw cat and dog faces for me, but I later realized that it was far too time consuming. Therefore I had to change the scope from recognizing random peoples drawings to recognizing my own drawings, which is obviously easier. Everything else did not change that much, I would just get better results.\r\n\r\n# 2. Preparation\r\n\r\n## 2.1. Drawing and taking a photo\r\n\r\n![The raw drawings](https://jeena.net/images/2013/catdog/drawing-taking-photo.jpg)\r\n\r\nI drew eight A4 sheets of such cat and dog faces which resulted in 64 dog faces and 60 cat faces. Then I took pictures of them with my digital camera.\r\n\r\nThere was a huge difference in quality between the pictures I took with my iPhone 4 camera and the ones I took with my Nikon D5000. In fact I was not able to use the pictures I took with the iPhone because it was impossible to find straight lines in them.\r\n\r\nYou can see the result here, one with the iPhone image as a source and the other with the Nikon image:\r\n\r\n![iPhone vs. Nikon sample](https://jeena.net/images/2013/catdog/iphone-sample.jpg)\r\n\r\n## 2.2. Photoshop\r\n\r\nI cleaned up the drawings so it would be easier for the algorithm to find everything. I opened the pictures of the drawings in Photoshop and played with the contrast and brightness settings.\r\n\r\nThen I cut out all the drawings from the big image and saved them as a black and white PNG images without dither.\r\n\r\n![Steps in Photoshop](https://jeena.net/images/2013/catdog/photoshop.jpg)\r\n\r\n## 2.3. Resizing\r\n\r\nI wrote a small shellscript which would take all pictures and resize them proportionally to a max width and height of 200 px. It also fills up the missing borders with a white background color. To do that I used the [ImageMagick](www.imagemagick.org) software suite:\r\n\r\n
#!/bin/sh\r\n\r\nNEW=\"new_$1\"\r\nrm -rf $NEW\r\nmkdir $NEW\r\n\r\nfor i in `ls -1 $1`\r\ndo\r\n\tconvert $1/$i \\\r\n\t\t-adaptive-resize 200x200\\> \\\r\n\t\t-size 200x200 xc:white +swap \\\r\n\t\t-gravity center \\\r\n\t\t-composite \\\r\n\t\t$NEW/$i\r\ndone
\r\n\r\nAfter that all the images had uniform sizes and colors so that I was able to compare them in a meaningful way.\r\n\r\n# 3. Feature extraction\r\n\r\nThe next step was to extract the features from the images. In other words find things in the pictures that would be unique enough to make a difference between cats and dogs but broad enough so that all dogs would fall into one category and all cats into the other.\r\n\r\n## 3.1. Straight lines\r\n\r\nThe first thing which came to mind was counting and doing other stuff with straight lines in the image.\r\n\r\n### 3.1.1 Canny edge detector\r\n\r\nI used an edge detector algorithm called Canny to preprocess the images which - as the name implies - finds edges in images. Because of my preparation with Photoshop it was quite easy for it to find them. It is not easy to see this step with my drawings, so here is a picture of how it looks like when you do this with a photo instead:\r\n\r\n![Canny on a photo from Wikipedia](https://jeena.net/images/2013/catdog/canny.jpg)\r\n\r\nIt basically removes noise with a gausian filter and then finds the intentisty gradians of the image with help of some trigonometry.\r\n\r\nI did not implement the algorithm myself, instead I used the often used [OpenCV implementation](http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.html).\r\n\r\n### 3.1.2 Hough transform\r\n\r\nTo find the lines I used the [Hough transform](https://en.wikipedia.org/wiki/Hough_transform) algorithm. The red lines are those which the Hough transform algorithm found in the example picture:\r\n\r\n![Hough lines](https://jeena.net/images/2013/catdog/hough.png)\r\n\r\nIt essentially groups edges, which can be imperfect, to object candidates by performing an explicit voting procedure. Detecting straight lines can be done by describing them as y = mx + b where m is the slope of the line and b is the intercept. The line is not represented by descrete points (x1,y1)(x2,y2) but instead as a point(x,y) in the parameter space, which makes detection of lines, which are a bit off, possible. In practice it is still more complicated, please read the [Wikipedia article](https://en.wikipedia.org/wiki/Hough_transform) about it.\r\n\r\nI did not implement it myself but used the often used and tested probabilistic [OpenCV implementation](http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp).\r\n\r\n## 3.2. Line features\r\n\r\nI extracted these features from the lines:\r\n\r\n- amount of lines\r\n- average length of lines\r\n- average angle of lines\r\n\r\n## 3.3. Other features\r\n\r\nI also extracted the amount of black pixels in the image to use it as a possible feature which was not using the extracted lines.\r\n\r\n# 4. _k_-nearest neighbor algorithm\r\n\r\nI chose to use the _k_-Nearest Neighbors algorithm which only locally looks at the neighbors of the document in a radius predefined by the user. It assumes that the document is of the same category as the highest number of neighbors within this radius.\r\nIn the following figure you can see that depending if the user choses k = 3, as shown by the solid line, the algorithm will conclude that the document in the center (green smiley) is of the type triangle because most of this three neighbors are triangles. If on the other hand the user choses k = 7, as shown by the dotted line, then the amount of neighbors which are rectangles is greater as the amount of neighbors which are triangles, so it concludes that the smiley is of type rectangle.\r\n\r\n![k-Nearest Neighbours as a graphic](https://jeena.net/images/2013/catdog/k-nearest-neighbours.png)\r\n\r\nIn the picture above you see how it would look with two dimensions. I have been using four features so the algorithm had to check the distance to the neighbours in four dimensions. This is not really more difficult, it is just more to calculate.\r\n\r\n# 5. Results\r\n\r\nThe results were quite encouraging, I assume it is because I only used one style to draw the dogs and one style to draw the cats.\r\n\r\n## 5.1. k-fold Cross-validation\r\n\r\nI used 10 fold cross-validation for every test I did, which means that I used 90% of the available data for the learning algorithms and then the remaining 10% to test how they performed. I repeated this ten times until all data had been used for testing once.\r\n\r\n## 5.2. Results with all features\r\n\r\nWhen I used all of the features and three nearest neighbours I got amazing 100% accuracy, which was kind of suspect because that normally means that you most probably did something wrong.\r\n\r\n## 5.3. Results with a reduced feature set\r\n\r\nTherefore I tried to reduce the features to check if it would perform worse.\r\n\r\n1. When I removed the information about the amount of black pixels basically nothing happened.\r\n2. When I removed the information about the amount of lines and average length at least I got a couple of wrong categorized images, the accuracy went down to 95%.\r\n3. When I removed the information about the average angle of the lines, that was when I got significant errors. The accuracy dropped down to about 60%, which is still better then pure chance.\r\n\r\nSo it seems like the best feature to detect cat and dog face drawings done by me was the average angle of the straight lines in the image.\r\n\r\n# 6. Future study\r\n\r\nThe most important next step would be to gather many more drawings done by other people who use other styles to draw cat and dog faces.\r\n\r\nThen it would be interesting to use other learning algorithms like Bayes, Perceptron, etc.\r\n\r\nAnd then it would be interesting to use this approach on photos of real cats and dogs.\r\n\r\n# 7. Code\r\n```python\r\n#!/usr/bin/env python\r\n\r\nimport cv2, cv, sys, math, os, numpy\r\nfrom scipy.spatial import KDTree\r\n\r\ndef extractFeatures(label):\r\n\r\n\tdirectory = \"img/\" + label + \"/\"\r\n\r\n\tfeatures = []\r\n\r\n\tfor fn in os.listdir(directory):\r\n\r\n\t\timg = cv2.imread(directory + fn, 0)\r\n\r\n\t\t# find edges\r\n\t\tcanny = cv2.Canny(img, 50, 100)\r\n\r\n\t\t# find colored\r\n\t\tblack_pixels = numpy.count_nonzero(img)\r\n\r\n\t\t# find lines lines\r\n\t\tlines = cv2.HoughLinesP(canny, 1, math.pi/360, 5, None, 10, 1)\r\n\r\n\t\tlengths = []\r\n\t\tangles = []\r\n\t\ttry:\r\n\t\t\tfor line in lines[0]:\r\n\t\t\t\tx1, y1, x2, y2 = line\r\n\r\n\t\t\t\t# Pythagoras\r\n\t\t\t\ta2 = math.pow((x1-x2), 2)\r\n\t\t\t\tb2 = math.pow((y1-y2), 2)\r\n\t\t\t\tlength = int(math.sqrt(a2 + b2))\r\n\t\t\t\tlengths.append(length)\r\n\r\n\t\t\t\tangle = int(math.degrees(math.atan((y1-y2) / (x1-x2))))\r\n\t\t\t\tangles.append(angle)\r\n\t\texcept:\r\n\t\t\tpass\r\n\r\n\t\t# print out everything\r\n\t\tlines_count = len(lengths)\r\n\t\tmid_length = sum(lengths) / lines_count\r\n\t\tmid_angle = sum(angles) / lines_count\r\n\r\n\t\tfeatures.append([\r\n\t\t\t[lines_count, mid_length, mid_angle, black_pixels],\r\n\t\t\tlabel\r\n\t\t])\r\n\r\n\treturn features\r\n\r\n\r\nif __name__ == \"__main__\":\r\n\tcats = extractFeatures(\"cat\")\r\n\tdogs = extractFeatures(\"dog\")\r\n\r\n\ttest_count = 5\r\n\r\n\ttest_data = dogs[:test_count] + cats[:test_count] \r\n\ttest_labels = map(lambda a: a[1], test_data)\r\n\ttest_features = map(lambda a: a[0], test_data)\r\n\r\n\tdata = cats[test_count:] + dogs[test_count:]\r\n\tlabels = map(lambda a: a[1], data)\r\n\tfeatures = map(lambda a: a[0], data)\r\n\r\n\ttree = KDTree(features)\r\n\t\r\n\tfor t in xrange(0, test_count * 2):\r\n\t\td, i = tree.query(test_features[t], k=3)\r\n\t\tprint \"-\"\r\n\t\tfor j in xrange(0, len(i)):\r\n\t\t\tprint test_labels[t] + \" is a \" + labels[i[j]]\r\n```\r\n","google":"","note":"Don't delete this file! It's used internally to help with page regeneration."} \ No newline at end of file diff --git a/stylesheets/pygment_trac.css b/stylesheets/pygment_trac.css deleted file mode 100644 index e65cedf..0000000 --- a/stylesheets/pygment_trac.css +++ /dev/null @@ -1,70 +0,0 @@ -.highlight .hll { background-color: #ffffcc } -.highlight { background: #f0f3f3; } -.highlight .c { color: #0099FF; font-style: italic } /* Comment */ -.highlight .err { color: #AA0000; background-color: #FFAAAA } /* Error */ -.highlight .k { color: #006699; font-weight: bold } /* Keyword */ -.highlight .o { color: #555555 } /* Operator */ -.highlight .cm { color: #0099FF; font-style: italic } /* Comment.Multiline */ -.highlight .cp { color: #009999 } /* Comment.Preproc */ -.highlight .c1 { color: #0099FF; font-style: italic } /* Comment.Single */ -.highlight .cs { color: #0099FF; font-weight: bold; font-style: italic } /* Comment.Special */ -.highlight .gd { background-color: #FFCCCC; border: 1px solid #CC0000 } /* Generic.Deleted */ -.highlight .ge { font-style: italic } /* Generic.Emph */ -.highlight .gr { color: #FF0000 } /* Generic.Error */ -.highlight .gh { color: #003300; font-weight: bold } /* Generic.Heading */ -.highlight .gi { background-color: #CCFFCC; border: 1px solid #00CC00 } /* Generic.Inserted */ -.highlight .go { color: #AAAAAA } /* Generic.Output */ -.highlight .gp { color: #000099; font-weight: bold } /* Generic.Prompt */ -.highlight .gs { font-weight: bold } /* Generic.Strong */ -.highlight .gu { color: #003300; font-weight: bold } /* Generic.Subheading */ -.highlight .gt { color: #99CC66 } /* Generic.Traceback */ -.highlight .kc { color: #006699; font-weight: bold } /* Keyword.Constant */ -.highlight .kd { color: #006699; font-weight: bold } /* Keyword.Declaration */ -.highlight .kn { color: #006699; font-weight: bold } /* Keyword.Namespace */ -.highlight .kp { color: #006699 } /* Keyword.Pseudo */ -.highlight .kr { color: #006699; font-weight: bold } /* Keyword.Reserved */ -.highlight .kt { color: #007788; font-weight: bold } /* Keyword.Type */ -.highlight .m { color: #FF6600 } /* Literal.Number */ -.highlight .s { color: #CC3300 } /* Literal.String */ -.highlight .na { color: #330099 } /* Name.Attribute */ -.highlight .nb { color: #336666 } /* Name.Builtin */ -.highlight .nc { color: #00AA88; font-weight: bold } /* Name.Class */ -.highlight .no { color: #336600 } /* Name.Constant */ -.highlight .nd { color: #9999FF } /* Name.Decorator */ -.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ -.highlight .ne { color: #CC0000; font-weight: bold } /* Name.Exception */ -.highlight .nf { color: #CC00FF } /* Name.Function */ -.highlight .nl { color: #9999FF } /* Name.Label */ -.highlight .nn { color: #00CCFF; font-weight: bold } /* Name.Namespace */ -.highlight .nt { color: #330099; font-weight: bold } /* Name.Tag */ -.highlight .nv { color: #003333 } /* Name.Variable */ -.highlight .ow { color: #000000; font-weight: bold } /* Operator.Word */ -.highlight .w { color: #bbbbbb } /* Text.Whitespace */ -.highlight .mf { color: #FF6600 } /* Literal.Number.Float */ -.highlight .mh { color: #FF6600 } /* Literal.Number.Hex */ -.highlight .mi { color: #FF6600 } /* Literal.Number.Integer */ -.highlight .mo { color: #FF6600 } /* Literal.Number.Oct */ -.highlight .sb { color: #CC3300 } /* Literal.String.Backtick */ -.highlight .sc { color: #CC3300 } /* Literal.String.Char */ -.highlight .sd { color: #CC3300; font-style: italic } /* Literal.String.Doc */ -.highlight .s2 { color: #CC3300 } /* Literal.String.Double */ -.highlight .se { color: #CC3300; font-weight: bold } /* Literal.String.Escape */ -.highlight .sh { color: #CC3300 } /* Literal.String.Heredoc */ -.highlight .si { color: #AA0000 } /* Literal.String.Interpol */ -.highlight .sx { color: #CC3300 } /* Literal.String.Other */ -.highlight .sr { color: #33AAAA } /* Literal.String.Regex */ -.highlight .s1 { color: #CC3300 } /* Literal.String.Single */ -.highlight .ss { color: #FFCC33 } /* Literal.String.Symbol */ -.highlight .bp { color: #336666 } /* Name.Builtin.Pseudo */ -.highlight .vc { color: #003333 } /* Name.Variable.Class */ -.highlight .vg { color: #003333 } /* Name.Variable.Global */ -.highlight .vi { color: #003333 } /* Name.Variable.Instance */ -.highlight .il { color: #FF6600 } /* Literal.Number.Integer.Long */ - -.type-csharp .highlight .k { color: #0000FF } -.type-csharp .highlight .kt { color: #0000FF } -.type-csharp .highlight .nf { color: #000000; font-weight: normal } -.type-csharp .highlight .nc { color: #2B91AF } -.type-csharp .highlight .nn { color: #000000 } -.type-csharp .highlight .s { color: #A31515 } -.type-csharp .highlight .sc { color: #A31515 } diff --git a/stylesheets/stylesheet.css b/stylesheets/stylesheet.css deleted file mode 100644 index b48c8a8..0000000 --- a/stylesheets/stylesheet.css +++ /dev/null @@ -1,427 +0,0 @@ -/******************************************************************************* -Slate Theme for GitHub Pages -by Jason Costello, @jsncostello -*******************************************************************************/ - -@import url(pygment_trac.css); - -/******************************************************************************* -MeyerWeb Reset -*******************************************************************************/ - -html, body, div, span, applet, object, iframe, -h1, h2, h3, h4, h5, h6, p, blockquote, pre, -a, abbr, acronym, address, big, cite, code, -del, dfn, em, img, ins, kbd, q, s, samp, -small, strike, strong, sub, sup, tt, var, -b, u, i, center, -dl, dt, dd, ol, ul, li, -fieldset, form, label, legend, -table, caption, tbody, tfoot, thead, tr, th, td, -article, aside, canvas, details, embed, -figure, figcaption, footer, header, hgroup, -menu, nav, output, ruby, section, summary, -time, mark, audio, video { - margin: 0; - padding: 0; - border: 0; - font: inherit; - vertical-align: baseline; -} - -/* HTML5 display-role reset for older browsers */ -article, aside, details, figcaption, figure, -footer, header, hgroup, menu, nav, section { - display: block; -} - -ol, ul { - list-style: none; -} - -blockquote, q { -} - -table { - border-collapse: collapse; - border-spacing: 0; -} - -/******************************************************************************* -Theme Styles -*******************************************************************************/ - -body { - box-sizing: border-box; - color:#373737; - background: #212121; - font-size: 16px; - font-family: 'Myriad Pro', Calibri, Helvetica, Arial, sans-serif; - line-height: 1.5; - -webkit-font-smoothing: antialiased; -} - -h1, h2, h3, h4, h5, h6 { - margin: 10px 0; - font-weight: 700; - color:#222222; - font-family: 'Lucida Grande', 'Calibri', Helvetica, Arial, sans-serif; - letter-spacing: -1px; -} - -h1 { - font-size: 36px; - font-weight: 700; -} - -h2 { - padding-bottom: 10px; - font-size: 32px; - background: url('../images/bg_hr.png') repeat-x bottom; -} - -h3 { - font-size: 24px; -} - -h4 { - font-size: 21px; -} - -h5 { - font-size: 18px; -} - -h6 { - font-size: 16px; -} - -p { - margin: 10px 0 15px 0; -} - -footer p { - color: #f2f2f2; -} - -a { - text-decoration: none; - color: #007edf; - text-shadow: none; - - transition: color 0.5s ease; - transition: text-shadow 0.5s ease; - -webkit-transition: color 0.5s ease; - -webkit-transition: text-shadow 0.5s ease; - -moz-transition: color 0.5s ease; - -moz-transition: text-shadow 0.5s ease; - -o-transition: color 0.5s ease; - -o-transition: text-shadow 0.5s ease; - -ms-transition: color 0.5s ease; - -ms-transition: text-shadow 0.5s ease; -} - -#main_content a:hover { - color: #0069ba; - text-shadow: #0090ff 0px 0px 2px; -} - -footer a:hover { - color: #43adff; - text-shadow: #0090ff 0px 0px 2px; -} - -em { - font-style: italic; -} - -strong { - font-weight: bold; -} - -img { - position: relative; - margin: 0 auto; - max-width: 739px; - padding: 5px; - margin: 10px 0 10px 0; - border: 1px solid #ebebeb; - - box-shadow: 0 0 5px #ebebeb; - -webkit-box-shadow: 0 0 5px #ebebeb; - -moz-box-shadow: 0 0 5px #ebebeb; - -o-box-shadow: 0 0 5px #ebebeb; - -ms-box-shadow: 0 0 5px #ebebeb; -} - -pre, code { - width: 100%; - color: #222; - background-color: #fff; - - font-family: Monaco, "Bitstream Vera Sans Mono", "Lucida Console", Terminal, monospace; - font-size: 14px; - - border-radius: 2px; - -moz-border-radius: 2px; - -webkit-border-radius: 2px; - - - -} - -pre { - width: 100%; - padding: 10px; - box-shadow: 0 0 10px rgba(0,0,0,.1); - overflow: auto; -} - -code { - padding: 3px; - margin: 0 3px; - box-shadow: 0 0 10px rgba(0,0,0,.1); -} - -pre code { - display: block; - box-shadow: none; -} - -blockquote { - color: #666; - margin-bottom: 20px; - padding: 0 0 0 20px; - border-left: 3px solid #bbb; -} - -ul, ol, dl { - margin-bottom: 15px -} - -ul li { - list-style: inside; - padding-left: 20px; -} - -ol li { - list-style: decimal inside; - padding-left: 20px; -} - -dl dt { - font-weight: bold; -} - -dl dd { - padding-left: 20px; - font-style: italic; -} - -dl p { - padding-left: 20px; - font-style: italic; -} - -hr { - height: 1px; - margin-bottom: 5px; - border: none; - background: url('../images/bg_hr.png') repeat-x center; -} - -table { - border: 1px solid #373737; - margin-bottom: 20px; - text-align: left; - } - -th { - font-family: 'Lucida Grande', 'Helvetica Neue', Helvetica, Arial, sans-serif; - padding: 10px; - background: #373737; - color: #fff; - } - -td { - padding: 10px; - border: 1px solid #373737; - } - -form { - background: #f2f2f2; - padding: 20px; -} - -img { - width: 100%; - max-width: 100%; -} - -/******************************************************************************* -Full-Width Styles -*******************************************************************************/ - -.outer { - width: 100%; -} - -.inner { - position: relative; - max-width: 640px; - padding: 20px 10px; - margin: 0 auto; -} - -#forkme_banner { - display: block; - position: absolute; - top:0; - right: 10px; - z-index: 10; - padding: 10px 50px 10px 10px; - color: #fff; - background: url('../images/blacktocat.png') #0090ff no-repeat 95% 50%; - font-weight: 700; - box-shadow: 0 0 10px rgba(0,0,0,.5); - border-bottom-left-radius: 2px; - border-bottom-right-radius: 2px; -} - -#header_wrap { - background: #212121; - background: -moz-linear-gradient(top, #373737, #212121); - background: -webkit-linear-gradient(top, #373737, #212121); - background: -ms-linear-gradient(top, #373737, #212121); - background: -o-linear-gradient(top, #373737, #212121); - background: linear-gradient(top, #373737, #212121); -} - -#header_wrap .inner { - padding: 50px 10px 30px 10px; -} - -#project_title { - margin: 0; - color: #fff; - font-size: 42px; - font-weight: 700; - text-shadow: #111 0px 0px 10px; -} - -#project_tagline { - color: #fff; - font-size: 24px; - font-weight: 300; - background: none; - text-shadow: #111 0px 0px 10px; -} - -#downloads { - position: absolute; - width: 210px; - z-index: 10; - bottom: -40px; - right: 0; - height: 70px; - background: url('../images/icon_download.png') no-repeat 0% 90%; -} - -.zip_download_link { - display: block; - float: right; - width: 90px; - height:70px; - text-indent: -5000px; - overflow: hidden; - background: url(../images/sprite_download.png) no-repeat bottom left; -} - -.tar_download_link { - display: block; - float: right; - width: 90px; - height:70px; - text-indent: -5000px; - overflow: hidden; - background: url(../images/sprite_download.png) no-repeat bottom right; - margin-left: 10px; -} - -.zip_download_link:hover { - background: url(../images/sprite_download.png) no-repeat top left; -} - -.tar_download_link:hover { - background: url(../images/sprite_download.png) no-repeat top right; -} - -#main_content_wrap { - background: #f2f2f2; - border-top: 1px solid #111; - border-bottom: 1px solid #111; -} - -#main_content { - padding-top: 40px; -} - -#footer_wrap { - background: #212121; -} - - - -/******************************************************************************* -Small Device Styles -*******************************************************************************/ - -@media screen and (max-width: 480px) { - body { - font-size:14px; - } - - #downloads { - display: none; - } - - .inner { - min-width: 320px; - max-width: 480px; - } - - #project_title { - font-size: 32px; - } - - h1 { - font-size: 28px; - } - - h2 { - font-size: 24px; - } - - h3 { - font-size: 21px; - } - - h4 { - font-size: 18px; - } - - h5 { - font-size: 14px; - } - - h6 { - font-size: 12px; - } - - code, pre { - min-width: 320px; - max-width: 480px; - font-size: 11px; - } - -} diff --git a/tf/README.md b/tf/README.md new file mode 100644 index 0000000..97e05f8 --- /dev/null +++ b/tf/README.md @@ -0,0 +1,3 @@ +Categorization of cat and dog faces with TensorFlow +=================================================== + diff --git a/tf/categorize.py b/tf/categorize.py new file mode 100755 index 0000000..7bb3e96 --- /dev/null +++ b/tf/categorize.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import tensorflow as tf +import pathlib +import random +import matplotlib.pyplot as plt + +tf.enable_eager_execution() + + +data_root = pathlib.Path('../img/') + +all_image_paths = list(data_root.glob('*/*')) +all_image_paths = [str(path) for path in all_image_paths] +random.shuffle(all_image_paths) +image_count = len(all_image_paths) +label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir()) +label_to_index = dict((name, index) for index,name in enumerate(label_names)) +all_image_labels = [label_to_index[pathlib.Path(path).parent.name] + for path in all_image_paths] + +def preprocess_image(image): + image = tf.image.decode_image(image, channels=3) + image = tf.image.resize_images(image, [192, 192]) + image /= 255.0 # normalize to [0,1] range + return image + +def load_and_preprocess_image(path): + image = tf.read_file(path) + return preprocess_image(image) + +image_path = all_image_paths[0] +label = all_image_labels[0] + +image = load_and_preprocess_image(image_path) +plt.imshow(image) +plt.grid(False) +plt.title(label_names[label].title()) +print()