Hello, as I often manage 3D point clouds, I find often helpful applying a statistical outlier removal filter (SOR) as the one found in the famous PCL libraries, so I’m posting here a function I wrote some time ago for doing just this.

Parameters:

  • p: a vector of 3d points, which must be in float precision.
  • nn -> Nearest Neighbor to be computed
  • std_threshold -> the multiplication factor of the standard deviation
  • app_steps -> number of time that the filter must run, useful when dealing with very noisy data (use a high threshold and repeat the filter multiple times)
  • exact -> whether or not the computation of nearest neighbor must be perfect or can be done with the FLANN library (thus obtaining a huge speed up)
  • flann_checks -> number of time the flann routine will check to be in the right path: it can be used as a parameter to improve the results of the nearest neighbor, that is not exact most of the time.

Output:

  • a vector of boolean indicating whether or not a point is an outlier (it is when the corresponding boolean value is FALSE)

I’ve run the code for various amount of points, finding out that with the predefined variables (1 filter step and 160 as a flann_check), when having less than 1400 points, the exact option gives best speed performance and assures a better solution.

std::vector<bool> SOR(std::vector<cv::Point3f> p, int nn, double std_threshold, int app_steps = 1, bool exact = true, int flann_checks = 160)
{
    std::vector<bool> ret(p.size());
    std::vector<float> dist1(p.size());
    std::vector<float> dist2(p.size());
    
    std::vector<int> indices(nn + 1);
    std::vector<float> distances(nn + 1);
    cv::Mat point(1, 3, CV_32FC1);
    
    cv::flann::Index kd_tree(cv::Mat(p).reshape(1), cv::flann::KDTreeIndexParams(), cvflann::FLANN_DIST_L2);
    
    if (exact)
    {
        for (int i = 0; i < p.size(); i++)
        {
        ret[i] = true;
    
            for (int j = 0; j < p.size(); j++)
            {
                if (j == i)
                    dist2[j] = 0.0f;
                else
                    dist2[j] = cv::norm(p[i] - p[j]);
            }
            
            std::sort(dist2.begin(), dist2.end());
            
            dist1[i] = 0.0;
            for (int j = 1; j < nn + 1; j++)
                dist1[i] += dist2[j];
            dist1[i] /= nn;
        }
    }
    else
    {
        for (int i = 0; i < p.size(); i++)
        {
            ret[i] = true;
            point.at<float>(0) = p[i].x;
            point.at<float>(1) = p[i].y;
            point.at<float>(2) = p[i].z;
            kd_tree.knnSearch(point, indices, distances, nn + 1, cv::flann::SearchParams(flann_checks));
    
            dist1[i] = 0.0f;
            for (int j = 1; j < nn + 1; j++)
                dist1[i] += sqrtf(distances[j]);
            dist1[i] /= (float)nn;
        }
    }
    
    for (int j = 0; j < app_steps; j++)
    {
        cv::Mat mean_m, stddev_m;
        cv::meanStdDev(dist1, mean_m, stddev_m, ret);
        float distance_threshold = mean_m.at<double>(0) + std_threshold * stddev_m.at<double>(0);
    
        for (int i = 0; i < p.size(); i++)
        {
            if (ret[i])
                ret[i] = dist1[i] < distance_threshold;
        }
    }
    
    return ret;
}