diff --git a/Makefile b/Makefile
index 8898ba85..41f0e049 100644
--- a/Makefile
+++ b/Makefile
@@ -74,7 +74,7 @@ CFLAGS+= -DCUDNN -I/usr/local/cudnn/include
 LDFLAGS+= -L/usr/local/cudnn/lib64 -lcudnn
 endif
 
-OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o tree.o
+OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
 OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index a6c2b518..300dd453 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -216,6 +216,7 @@
     <ClCompile Include="..\..\src\parser.c" />
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
+    <ClCompile Include="..\..\src\reorg_old_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -269,6 +270,7 @@
     <ClInclude Include="..\..\src\parser.h" />
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
+    <ClInclude Include="..\..\src\reorg_old_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\shortcut_layer.h" />
diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj
index 578a5af3..e1c4c37f 100644
--- a/build/darknet/darknet_no_gpu.vcxproj
+++ b/build/darknet/darknet_no_gpu.vcxproj
@@ -213,6 +213,7 @@
     <ClCompile Include="..\..\src\parser.c" />
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
+    <ClCompile Include="..\..\src\reorg_old_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -266,6 +267,7 @@
     <ClInclude Include="..\..\src\parser.h" />
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
+    <ClInclude Include="..\..\src\reorg_old_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\shortcut_layer.h" />
diff --git a/build/darknet/x64/darknet_voc.cmd b/build/darknet/x64/darknet_voc.cmd
index 864bb641..1ee177a6 100644
--- a/build/darknet/x64/darknet_voc.cmd
+++ b/build/darknet/x64/darknet_voc.cmd
@@ -1,8 +1,8 @@
 
-darknet.exe detector test data/voc.data yolo-voc.cfg yolo-voc.weights 009460.jpg
+rem darknet.exe detector test data/voc.data yolo-voc.cfg yolo-voc.weights 009460.jpg
 
 
-darknet.exe detector test data/voc.data yolo-voc.cfg yolo-voc.weights -i 0 -thresh 0.1 dogr.jpg
+darknet.exe detector test data/voc.data yolo-voc.cfg yolo-voc.weights -i 0 -thresh 0.2 dogr.jpg
 
 
 pause
\ No newline at end of file
diff --git a/build/darknet/x64/dogr.jpg b/build/darknet/x64/dogr.jpg
new file mode 100644
index 00000000..e9201231
Binary files /dev/null and b/build/darknet/x64/dogr.jpg differ
diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj
index 31699fba..b68c5b4c 100644
--- a/build/darknet/yolo_cpp_dll.vcxproj
+++ b/build/darknet/yolo_cpp_dll.vcxproj
@@ -218,6 +218,7 @@
     <ClCompile Include="..\..\src\parser.c" />
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
+    <ClCompile Include="..\..\src\reorg_old_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -273,6 +274,7 @@
     <ClInclude Include="..\..\src\parser.h" />
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
+    <ClInclude Include="..\..\src\reorg_old_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\shortcut_layer.h" />
diff --git a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
index 03fbd3aa..c1d1d30e 100644
--- a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
+++ b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj
@@ -202,6 +202,7 @@
     <ClCompile Include="..\..\src\parser.c" />
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
+    <ClCompile Include="..\..\src\reorg_old_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -256,6 +257,7 @@
     <ClInclude Include="..\..\src\parser.h" />
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
+    <ClInclude Include="..\..\src\reorg_old_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\shortcut_layer.h" />
diff --git a/src/blas.c b/src/blas.c
index cb6501fd..6d565e96 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -6,22 +6,25 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
+void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride, int forward, float *out)
 {
     int b,i,j,k;
-    int out_c = c/(stride*stride);
+    int in_c = out_c/(stride*stride);
+
+	//printf("\n out_c = %d, out_w = %d, out_h = %d, stride = %d, forward = %d \n", out_c, out_w, out_h, stride, forward);
+	//printf("  in_c = %d,  in_w = %d,  in_h = %d \n", in_c, out_w*stride, out_h*stride);
 
     for(b = 0; b < batch; ++b){
-        for(k = 0; k < c; ++k){
-            for(j = 0; j < h; ++j){
-                for(i = 0; i < w; ++i){
-                    int in_index  = i + w*(j + h*(k + c*b));
-                    int c2 = k % out_c;
-                    int offset = k / out_c;
+        for(k = 0; k < out_c; ++k){
+            for(j = 0; j < out_h; ++j){
+                for(i = 0; i < out_w; ++i){
+                    int in_index  = i + out_w*(j + out_h*(k + out_c*b));
+                    int c2 = k % in_c;
+                    int offset = k / in_c;
                     int w2 = i*stride + offset % stride;
                     int h2 = j*stride + offset / stride;
-                    int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
-                    if(forward) out[out_index] = x[in_index];
+                    int out_index = w2 + out_w*stride*(h2 + out_h*stride*(c2 + in_c*b));
+                    if(forward) out[out_index] = x[in_index];	// used by default for forward (i.e. forward = 0)
                     else out[in_index] = x[out_index];
                 }
             }
diff --git a/src/layer.h b/src/layer.h
index 93aca6c7..9bf60570 100644
--- a/src/layer.h
+++ b/src/layer.h
@@ -34,6 +34,7 @@ typedef enum {
     XNOR,
     REGION,
     REORG,
+	REORG_OLD,
     BLANK
 } LAYER_TYPE;
 
diff --git a/src/parser.c b/src/parser.c
index 78036322..1adcd166 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -24,6 +24,7 @@
 #include "parser.h"
 #include "region_layer.h"
 #include "reorg_layer.h"
+#include "reorg_old_layer.h"
 #include "rnn_layer.h"
 #include "route_layer.h"
 #include "shortcut_layer.h"
@@ -60,6 +61,7 @@ LAYER_TYPE string_to_layer_type(char * type)
     if (strcmp(type, "[max]")==0
             || strcmp(type, "[maxpool]")==0) return MAXPOOL;
     if (strcmp(type, "[reorg]")==0) return REORG;
+	if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD;
     if (strcmp(type, "[avg]")==0
             || strcmp(type, "[avgpool]")==0) return AVGPOOL;
     if (strcmp(type, "[dropout]")==0) return DROPOUT;
@@ -358,6 +360,23 @@ layer parse_reorg(list *options, size_params params)
     return layer;
 }
 
+layer parse_reorg_old(list *options, size_params params)
+{
+	printf("\n reorg_old \n");
+	int stride = option_find_int(options, "stride", 1);
+	int reverse = option_find_int_quiet(options, "reverse", 0);
+
+	int batch, h, w, c;
+	h = params.h;
+	w = params.w;
+	c = params.c;
+	batch = params.batch;
+	if (!(h && w && c)) error("Layer before reorg layer must output image.");
+
+	layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
+	return layer;
+}
+
 maxpool_layer parse_maxpool(list *options, size_params params)
 {
     int stride = option_find_int(options, "stride",1);
@@ -657,7 +676,9 @@ network parse_network_cfg_custom(char *filename, int batch)
         }else if(lt == MAXPOOL){
             l = parse_maxpool(options, params);
         }else if(lt == REORG){
-            l = parse_reorg(options, params);
+            l = parse_reorg(options, params);		}
+		else if (lt == REORG_OLD) {
+			l = parse_reorg_old(options, params);
         }else if(lt == AVGPOOL){
             l = parse_avgpool(options, params);
         }else if(lt == ROUTE){
diff --git a/src/reorg_layer.c b/src/reorg_layer.c
index 2abca8fa..9c8ea829 100644
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@@ -77,38 +77,44 @@ void resize_reorg_layer(layer *l, int w, int h)
 
 void forward_reorg_layer(const layer l, network_state state)
 {
-    if(l.reverse){
-        reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
-    }else {
-        reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
-    }
+	if (l.reverse) {
+		reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output);
+	}
+	else {
+		reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output);
+	}
 }
 
 void backward_reorg_layer(const layer l, network_state state)
 {
-    if(l.reverse){
-        reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
-    }else{
-        reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
-    }
+	if (l.reverse) {
+		reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta);
+	}
+	else {
+		reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta);
+	}
 }
 
 #ifdef GPU
 void forward_reorg_layer_gpu(layer l, network_state state)
 {
-    if(l.reverse){
-        reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
-    }else {
-        reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
-    }
+	if (l.reverse) {
+		reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output_gpu);
+	}
+	else {
+		reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output_gpu);
+	}
 }
 
 void backward_reorg_layer_gpu(layer l, network_state state)
 {
-    if(l.reverse){
-        reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
-    }else{
-        reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
-    }
+	if (l.reverse) {
+		reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta);
+		//reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
+	}
+	else {
+		reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta);
+		//reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
+	}
 }
 #endif
diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c
new file mode 100644
index 00000000..c55cf7c2
--- /dev/null
+++ b/src/reorg_old_layer.c
@@ -0,0 +1,118 @@
+#include "reorg_old_layer.h"
+#include "cuda.h"
+#include "blas.h"
+#include <stdio.h>
+
+
+layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int reverse)
+{
+    layer l = {0};
+    l.type = REORG_OLD;
+    l.batch = batch;
+    l.stride = stride;
+    l.h = h;
+    l.w = w;
+    l.c = c;
+    if(reverse){
+        l.out_w = w*stride;
+        l.out_h = h*stride;
+        l.out_c = c/(stride*stride);
+    }else{
+        l.out_w = w/stride;
+        l.out_h = h/stride;
+        l.out_c = c*(stride*stride);
+    }
+    l.reverse = reverse;
+    fprintf(stderr, "reorg_old              /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",  stride, w, h, c, l.out_w, l.out_h, l.out_c);
+    l.outputs = l.out_h * l.out_w * l.out_c;
+    l.inputs = h*w*c;
+    int output_size = l.out_h * l.out_w * l.out_c * batch;
+    l.output =  calloc(output_size, sizeof(float));
+    l.delta =   calloc(output_size, sizeof(float));
+
+    l.forward = forward_reorg_old_layer;
+    l.backward = backward_reorg_old_layer;
+#ifdef GPU
+    l.forward_gpu = forward_reorg_old_layer_gpu;
+    l.backward_gpu = backward_reorg_old_layer_gpu;
+
+    l.output_gpu  = cuda_make_array(l.output, output_size);
+    l.delta_gpu   = cuda_make_array(l.delta, output_size);
+#endif
+    return l;
+}
+
+void resize_reorg_old_layer(layer *l, int w, int h)
+{
+    int stride = l->stride;
+    int c = l->c;
+
+    l->h = h;
+    l->w = w;
+
+    if(l->reverse){
+        l->out_w = w*stride;
+        l->out_h = h*stride;
+        l->out_c = c/(stride*stride);
+    }else{
+        l->out_w = w/stride;
+        l->out_h = h/stride;
+        l->out_c = c*(stride*stride);
+    }
+
+    l->outputs = l->out_h * l->out_w * l->out_c;
+    l->inputs = l->outputs;
+    int output_size = l->outputs * l->batch;
+
+    l->output = realloc(l->output, output_size * sizeof(float));
+    l->delta = realloc(l->delta, output_size * sizeof(float));
+
+#ifdef GPU
+    cuda_free(l->output_gpu);
+    cuda_free(l->delta_gpu);
+    l->output_gpu  = cuda_make_array(l->output, output_size);
+    l->delta_gpu   = cuda_make_array(l->delta,  output_size);
+#endif
+}
+
+void forward_reorg_old_layer(const layer l, network_state state)
+{
+	if (l.reverse) {
+		reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
+	}
+	else {
+		reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
+	}
+}
+
+void backward_reorg_old_layer(const layer l, network_state state)
+{
+	if (l.reverse) {
+		reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
+	}
+	else {
+		reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
+	}
+}
+
+#ifdef GPU
+void forward_reorg_old_layer_gpu(layer l, network_state state)
+{
+	if (l.reverse) {
+		reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
+	}
+	else {
+		reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
+	}
+}
+
+void backward_reorg_old_layer_gpu(layer l, network_state state)
+{
+	if (l.reverse) {
+		reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
+	}
+	else {
+		reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
+	}
+}
+#endif
diff --git a/src/reorg_old_layer.h b/src/reorg_old_layer.h
new file mode 100644
index 00000000..e1838202
--- /dev/null
+++ b/src/reorg_old_layer.h
@@ -0,0 +1,20 @@
+#ifndef REORG_OLD_LAYER_H
+#define REORG_OLD_LAYER_H
+
+#include "image.h"
+#include "cuda.h"
+#include "layer.h"
+#include "network.h"
+
+layer make_reorg_old_layer(int batch, int h, int w, int c, int stride, int reverse);
+void resize_reorg_old_layer(layer *l, int w, int h);
+void forward_reorg_old_layer(const layer l, network_state state);
+void backward_reorg_old_layer(const layer l, network_state state);
+
+#ifdef GPU
+void forward_reorg_old_layer_gpu(layer l, network_state state);
+void backward_reorg_old_layer_gpu(layer l, network_state state);
+#endif
+
+#endif
+