OK SHOULD I START WORKING ON CVPR OR WHAT?

2023-08-10 21:13:14 +03:00 · 2017-11-07 16:10:33 -08:00
parent c725270342
commit 3fb3eec650
12 changed files with 1003 additions and 208 deletions
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@@ -140,6 +140,41 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
    return 0;
 }

+__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
+{
+    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    int i = id % s;
+    int b = id / s;
+    float x1 = x[b*s + i];
+    float x2 = x[b*s + s/2 + i];
+    if(id < n) {
+        float de = dy[id];
+        dx[b*s + i] = x2*de;
+        dx[b*s + s/2 + i] = x1*de; 
+    }
+}
+
+extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) 
+{
+    binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
+    check_error(cudaPeekAtLastError());
+}
+__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
+{
+    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    int i = id % s;
+    int b = id / s;
+    float x1 = x[b*s + i];
+    float x2 = x[b*s + s/2 + i];
+    if(id < n) y[id] = x1*x2;
+}
+
+extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) 
+{
+    binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
+    check_error(cudaPeekAtLastError());
+}
+
 __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
 {
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
--- a/src/data.c
+++ b/src/data.c
@@ -1172,6 +1172,56 @@ data load_data_regression(char **paths, int n, int m, int min, int max, int size
    return d;
 }

+data select_data(data *orig, int *inds)
+{
+    data d = {0};
+    d.shallow = 1;
+    d.w = orig[0].w;
+    d.h = orig[0].h;
+
+    d.X.rows = orig[0].X.rows;
+    d.y.rows = orig[0].X.rows;
+
+    d.X.cols = orig[0].X.cols;
+    d.y.cols = orig[0].y.cols;
+
+    d.X.vals = calloc(orig[0].X.rows, sizeof(float *));
+    d.y.vals = calloc(orig[0].y.rows, sizeof(float *));
+    int i;
+    for(i = 0; i < d.X.rows; ++i){
+        d.X.vals[i] = orig[inds[i]].X.vals[i];
+        d.y.vals[i] = orig[inds[i]].y.vals[i];
+    }
+    return d;
+}
+
+data *tile_data(data orig, int divs, int size)
+{
+    data *ds = calloc(divs*divs, sizeof(data));
+    int i, j;
+    #pragma omp parallel for
+    for(i = 0; i < divs*divs; ++i){
+        data d;
+        d.shallow = 0;
+        d.w = orig.w/divs * size;
+        d.h = orig.h/divs * size;
+        d.X.rows = orig.X.rows;
+        d.X.cols = d.w*d.h*3;
+        d.X.vals = calloc(d.X.rows, sizeof(float*));
+
+        d.y = copy_matrix(orig.y);
+        #pragma omp parallel for
+        for(j = 0; j < orig.X.rows; ++j){
+            int x = (i%divs) * orig.w / divs - (d.w - orig.w/divs)/2;
+            int y = (i/divs) * orig.h / divs - (d.h - orig.h/divs)/2;
+            image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[j]);
+            d.X.vals[j] = crop_image(im, x, y, d.w, d.h).data;
+        }
+        ds[i] = d;
+    }
+    return ds;
+}
+
 data resize_data(data orig, int w, int h)
 {
    data d = {0};
@@ -1181,9 +1231,10 @@ data resize_data(data orig, int w, int h)
    int i;
    d.X.rows = orig.X.rows;
    d.X.cols = w*h*3;
-    d.X.vals = calloc(d.X.rows, sizeof(float));
+    d.X.vals = calloc(d.X.rows, sizeof(float*));

    d.y = copy_matrix(orig.y);
+    #pragma omp parallel for
    for(i = 0; i < orig.X.rows; ++i){
        image im = float_to_image(orig.w, orig.h, 3, orig.X.vals[i]);
        d.X.vals[i] = resize_image(im, w, h).data;
@@ -1239,6 +1290,8 @@ data concat_data(data d1, data d2)
    d.shallow = 1;
    d.X = concat_matrix(d1.X, d2.X);
    d.y = concat_matrix(d1.y, d2.y);
+    d.w = d1.w;
+    d.h = d1.h;
    return d;
 }

--- a/src/utils.c
+++ b/src/utils.c
@@ -91,6 +91,22 @@ void shuffle(void *arr, size_t n, size_t size)
    }
 }

+int *random_index_order(int min, int max)
+{
+    int *inds = calloc(max-min, sizeof(int));
+    int i;
+    for(i = min; i < max; ++i){
+        inds[i] = i;
+    }
+    for(i = min; i < max-1; ++i){
+        int swap = inds[i];
+        int index = i + rand()%(max-i);
+        inds[i] = inds[index];
+        inds[index] = swap;
+    }
+    return inds;
+}
+
 void del_arg(int argc, char **argv, int index)
 {
    int i;
@@ -583,6 +599,20 @@ int sample_array(float *a, int n)
    return n-1;
 }

+int max_int_index(int *a, int n)
+{
+    if(n <= 0) return -1;
+    int i, max_i = 0;
+    int max = a[0];
+    for(i = 1; i < n; ++i){
+        if(a[i] > max){
+            max = a[i];
+            max_i = i;
+        }
+    }
+    return max_i;
+}
+
 int max_index(float *a, int n)
 {
    if(n <= 0) return -1;
--- a/src/utils.h
+++ b/src/utils.h
@@ -44,7 +44,6 @@ int constrain_int(int a, int min, int max);
 float rand_uniform(float min, float max);
 float rand_scale(float s);
 int rand_int(int min, int max);
-float sum_array(float *a, int n);
 void mean_arrays(float **a, int n, int els, float *avg);
 float dist_array(float *a, float *b, int n, int sub);
 float **one_hot_encode(float *a, int n, int k);