In this paper, we establish a connection between image processing, visual perception, and deep learning by introducing a mathematical model inspired by visual perception from which neural network layers and image processing models for color correction can be derived. Our model is inspired by the geometry of visual perception and couples a geometric model for the organization of some neurons in the visual cortex with a geometric model of color perception. More precisely, the model is a combination of a Wilson-Cowan equation describing the activity of neurons responding to edges and textures in the area V1 of the visual cortex and a Retinex model of color vision. For some particular activation functions, this yields a color correction model which processes simultaneously edges/textures, encoded into a Riemannian metric, and the color contrast, encoded into a nonlocal covariant derivative. Then, we show that the proposed model can be assimilated to a residual layer provided that the activation function is nonlinear and to a convolutional layer for a linear activation function. Finally, we show the accuracy of the model for deep learning by testing it on the MNIST dataset for digit classication.

}, author = {Thomas Batard and Eduard Ramon Maldonado and Gabriele Steidl and Marcelo Bertalm{\'\i}o} } @article {210, title = {A Geometric Model of Brightness Perception and its Application to Color Images Correction}, journal = {*Accepted* in Journal of Mathematical Imaging and Vision}, year = {2018}, abstract = {
Human perception involves many features like contours, shapes, textures, and colors to name a few. Whereas several geometric models for contours, shapes and textures perception have been proposed, the geometry of color perception has received very little attention, possibly due to the fact that our perception of colors is still not fully understood. Nonetheless, there exists a class of mathematical models, gathered under the name Retinex, that aim at modeling the color perception of an image, that are inspired by psychophysical/physiological knowledge about color perception, and that can geometrically be viewed as the averaging of perceptual distances between image pixels.

Some of the Retinex models turn out to be associated to an ecient image processing technique for the correction of camera output images.

The aim of this paper is to show that this image processing technique can be improved by including more properties of the human visual system. To that purpose, we rst present a generalization of the perceptual distance between image pixels by considering the parallel transport map associated to a covariant derivative on a vector bundle, and from which can be derived a new image processing model for color images correction. Then, we show that the family of covariant derivatives constructed in [T. Batard and N. Sochen, J. Math. Imaging Vision, 48(3) (2014), pp. 517-543] can model some color appearance phenomena related to brightness perception. Finally, we conduct experiments in which we show that the image processing techniques induced by these covariant derivatives outperform the original

approach.

},
author = {Thomas Batard and Marcelo Bertalm{\'\i}o}
}
@article {206,
title = {Derivatives and Inverse of Cascaded Linear+Nonlinear Neural Models},
journal = {PLOS ONE},
year = {2017},
abstract = {
In vision science, cascades of Linear+Nonlinear transforms are very successful in modeling a number of perceptual experiences [1]. However, the conventional literature is usually too focused on only describing the forward input-output transform. Instead, in this work we present the mathematics of such cascades beyond the forward transform, namely the Jacobian matrices and the inverse. These analytic results are important for three reasons: (a) they are strictly necessary in new experimental methods based on the synthesis of visual stimuli with interesting geometrical properties, (b) they are convenient to learn the model from classical experiments or alternative goal optimization, and (c) they are a promising model-based alternative to blind machine-learning methods for neural decoding. Moreover, the statistical properties of the neural model are more intuitive by using this kind of vector formulation. The theory is checked by building and testing a vision model that actually follows the modular program suggested in [1]. Our derivable and invertible model consists of a cascade of modules that account for brightness, contrast, energy masking, and wavelet masking. To stress the generality of this modular setting we show examples where some of the canonical Divisive Normalization modules are substituted by equivalent modules such as the Wilson-Cowan interaction model [2, 3] (at the V1 cortex) or a tone-mapping model [4] (at the retina). In the Discussion we address three illustrative applications. First, we show how the Jacobian (w.r.t. the input) plays a major role in setting the model by allowing novel psychophysics based on the geometry of the neural representation (as in [5]). Second, we show how the Jacobian (w.r.t. the parameters) can be used to find the model that better reproduces classical psychophysics of image distortion. In fact, thanks to the presented derivatives, this cascade of isomorphic canonical modules has been psychophysically tuned to work together for the first time. Third, we show how the analytic inverse may improve regression-based visual brain decoding.

\

},
url = {https://arxiv.org/abs/1711.00526},
author = {Marina Martinez-Garc{\'\i}a and Praveen Cyriac and Thomas Batard and Marcelo Bertalm{\'\i}o and Jes{\'u}s Malo}
}
@article {187,
title = {The Wilson-Cowan Model Describes Contrast Response and Subjective Distortion},
journal = {Vision Sciences Society Annual Meeting},
year = {2017},
abstract = {
The Wilson-Cowan equations were originally proposed to describe the low-level dynamics of neural populations (Wilson\&Cowan 1972). These equations have been extensively used in modelling the oscillations of cortical activity (Cowan et al. 2016). However, due to their low-level nature, very few works have attempted connections to higher level psychophysics (Herzog et al. 2003, Hermens et al. 2005) and, to the best of our knowledge, they have not been used to predict contrast response curves or subjective image quality. Interestingly (Bertalm\ío\&Cowan 2009) showed that Wilson-Cowan models may lead to (high level) color constancy. Moreover, these models may have positive statistical effects similarly to Divisive Normalization, which is the canonical choice to understand contrast response (Watson\&Solomon 1997, Carandini\&Heeger 2012): while Divisive Normalization reduces redundancy due to predictive coding (Malo\&Laparra 2010), Wilson-Cowan leads to local histogram equalization (Bertalm\ío 2014), another route to

increase channel capacity.

\

Here we show that the functional (statistical) similarities between Wilson-Cowan and Divisive Normalization actually hold and may be extended to contrast perception. Specifically, first we fitted the Wilson-Cowan model using a procedure reported for Divisive Normalization: following (Watson\&Malo 2002, Laparra\&Malo 2010), we maximized the correlation with human opinion in quality assessment. Secondly, we used the resulting model to predict the visibility of textured patterns on top of backgrounds of different frequencies and contrasts as in classical masking experiments. Finally, we checked the redundancy reduction of Wilson-Cowan and Divisive Normalization in the same way (as in Malo\&Laparra 2010). Results show that (1) Wilson-Cowan is as good as Divisive Normalization in reproducing image distortion psychophysics, (2) Wilson-Cowan dynamics induces saturating responses that attenuate with the contrast of the background, particularly when the background resembles the test; and (3) mutual information between V1-like responses after the Wilson-Cowan interaction decreases similarly as in Divisive Normalization.

},
author = {Marcelo Bertalm{\'\i}o and Praveen Cyriac and Thomas Batard and Marina Martinez-Garc{\'\i}a and Jes{\'u}s Malo}
}
@article {167,
title = {A Class of Nonlocal Variational Problems on a Vector Bundle for Color Image Local Contrast Reduction/Enhancement},
journal = {*Accepted* in Geometry, Imaging and Computing},
year = {2016},
abstract = {
We extend two existing variational models from the Euclidean space to a vector bundle over a Riemannian manifold. The Euclidean models, dedicated to regularize or enhance some color image features, are based on the concept of nonlocal gradient operator acting on a function of the Euclidean space. We then extend these models by generalizing this operator to a vector bundle over a Riemannian manifold with the help of the parallel transport map associated to some class of covariant derivatives. Through the dual formulations of the proposed models, we obtain the expressions of their solutions, which exhibit the functional spaces that describe the image features. Finally, for a well-chosen covariant derivative and its nonlocal extension, the proposed models perform local contrast modification (reduction or enhancement) and experiments show that they preserve more the aspect of the original image than the Euclidean models do while modifying equally its contrast.

},
author = {Thomas Batard and Marcelo Bertalm{\'\i}o}
}
@article {172,
title = {Correcting for Induction Phenomena on Displays of Differrent Size},
journal = {Vision Sciences Society Annual Meeting},
year = {2016},
url = {http://f1000research.com/posters/5-1215},
author = {Marcelo Bertalm{\'\i}o and Thomas Batard and Jihyun Kim}
}
@conference {160,
title = {Local denoising applied to RAW images may outperform non-local patch-based methods applied to the camera output},
booktitle = {IS\&T Electronic Imaging Conference},
year = {2016},
abstract = {
State-of-the-art denoising methods achieve impressive results, even for large noise levels. However, they can not be implemented in camera hardware, mainly due to the fact that they are computationally too intensive. The aim of this paper is then to show that we can obtain comparable denoising results to the ones obtained with state-of-art methods by inserting a well-chosen fast denoising method at the right location in the camera processing pipeline. We evaluate our results visually and with respect to objective measures.

},
author = {Gabriela Ghimpeteanu and Thomas Batard and Tamara Seybold and Marcelo Bertalm{\'\i}o}
}
@conference {169,
title = {Local Denoising Based on Curvature Smoothing can Visually Outperform Non-local Methods on Photographs with Actual Noise},
booktitle = {IEEE International Conference on Image Processing},
year = {2016},
abstract = {
We propose a fast, local denoising method where the Euclidean curvature of the noisy image is approximated in a regularizing manner and a clean image is reconstructed from this smoothed curvature. User preference tests show that when denoising real photographs with actual noise our method produces results with the same visual quality as the more sophisticated, non-local algorithms Non-local Means and BM3D, but at a fraction of their computational cost. These tests also highlight the limitations of objective image quality metrics like PSNR and SSIM, which correlate poorly with user preference.

},
author = {Gabriela Ghimpeteanu and David Kane and Thomas Batard and Stacey Levine and Marcelo Bertalm{\'\i}o}
}
@article {173,
title = {Retinal Processing Optimizes Contrast Coding},
journal = {Vision Sciences Society Annual Meeting},
year = {2016},
abstract = {The properties of human contrast perception show a close correlation to the responses of retinal ganglion cells (Kelly, 1977; Lee, 1990), whose spatial processing properties (the isotropic center-surround processing) are shaped by the local feedback from interneurons (horizontal / amacrine cells) to the feed-forward cells (photoreceptor, and bipolar / ganglion cells). In the current work we investigated the computational structure of this retinal feedback system. We first identified a simple form of a system of differential equations that realizes the retinal feedback architecture and analyzed its steady-state behaviour to a static stimulus input. Three main conclusions may be derived from the results of the analysis. Firstly, the system of equations preserves the ability to predict some human contrast perception properties such as spatial-frequency dependent contrast sensitivity and brightness induction (contrast and assimilation) as other existing retinal models predict (Kim \& Bertalm\ío, 2015; submitted; van Hateren, 2007; Wilson, 1997), thus showing a minimum computational structure to emulate human contrast perception engendered at the retina. Secondly, the steady-state response of the system can be obtained in a single pass by convolving the original input with a single kernel (a combined product of different extents of receptive-fields of the retinal cells) and therefore our work proposes a computationally efficient way of modeling retinal cell responses and the resulting human contrast perception. Finally, finding the steady state solution is mathematically equivalent to solving an optimization problem of maximizing the spatial contrast in the encoded signals while being faithful to the local light intensity of the input stimulus, which suggests interesting connections with efficient coding theories and computational neuroscience models like the Wilson-Cowan equations (see Bertalm\ío, 2014). Our results shed light on the computational goal of the feedback architecture in the retinal circuit: an optimized representation of the spatial contrast in the incoming light pattern.

}, url = {http://f1000research.com/posters/5-1464}, author = {Jihyun Kim and Thomas Batard and Marcelo Bertalm{\'\i}o} } @article {146, title = {A Decomposition Framework for Image Denoising Algorithms}, journal = {IEEE Transactions on Image Processing}, year = {2015}, abstract = {
In this paper we consider an image decomposition model that provides a novel framework for image denoising. The model computes the components of the image to be processed in a moving frame that encodes its local geometry (directions of gradients and level-lines). Then, the strategy we develop is to denoise the components of the image in the moving frame in order to preserve its local geometry, which would have been more affected if processing the image directly. Experiments on a whole image database tested with several denoising methods show that this framework can provide better results than denoising the image directly, both in terms of PSNR and SSIM [27] metrics.

},
author = {Gabriela Ghimpeteanu and Thomas Batard and Marcelo Bertalm{\'\i}o and Stacey Levine}
}
@conference {130,
title = {Duality Principle for Image Regularization and Perceptual Color Correction Models},
booktitle = {Proceedings of International Conference on Scale Space and Variational Methods in Computer Vision (SSVM)},
year = {2015},
isbn = {978-3-319-18461-6},
doi = {10.1007/978-3-319-18461-6_36},
author = {Thomas Batard and Marcelo Bertalm{\'\i}o}
}
@article {105,
title = {On Covariant Derivatives and Their Applications to Image Regularization},
journal = {SIAM Journal on Imaging Sciences (SIIMS)},
year = {2014},
abstract = {We present a generalization of the Euclidean and Riemannian gradient operators to a vector bundle, a geometric structure generalizing the concept of manifold. One of the key ideas is to replace the standard differentiation of a function by the covariant differentiation of a section. Dealing with covariant derivatives satisfying the property of compatibility with vector bundle metrics, we construct generalizations of existing mathematical models for image regularization that involve the Euclidean gradient operator, namely the linear scale-space and the Rudin-Osher-Fatemi denoising model. For well-chosen covariant derivatives, we show that our denoising model outperforms state-of-the-art variational denoising methods of the same type both in terms of PSNR and Q-index [45].

}, doi = {10.1137/140954039}, author = {Thomas Batard and Marcelo Bertalm{\'\i}o} } @conference {104, title = {Denoising an Image by Denoising its Components in a Moving Frame}, booktitle = {International Conference on Image and Signal Processing (ICISP). *Best Paper Award*}, year = {2014}, abstract = {In this paper, we provide a new non-local method for image denoising. The key idea we develop is to denoise the components of the image in a well-chosen moving frame instead of the image itself. We prove the relevance of our approach by showing that the PSNR of a grayscale noisy image is lower than the PSNR of its components. Experiments show that applying the Non Local Means algorithm of Buades et al. [5] on the components provides better results than applying it directly on the image.

}, author = {Gabriela Ghimpeteanu and Thomas Batard and Marcelo Bertalm{\'\i}o and Stacey Levine} } @article {111, title = {A Non Local Variational Formulation for the Improvement of Tone Mapped Images}, journal = {SIAM Journal on Imaging Sciences (SIIMS)}, year = {2014}, abstract = {
Due to technical limitations, common display devices can only reproduce images having a low range of intensity values (dynamic range). As a consequence, the dynamic range of images encoding real world scenes, which is large, has to be compressed in order for them to be reproduced on a common display, and this technique is called tone mapping. Because there is no ground truth to compare with, evaluation of a tone mapped image has to be done by comparing with the original high dynamic range image. As standard metrics based on pixel-wise comparisons are not suitable for comparing images of different dynamic range, non local perceptual based metrics are commonly used. We propose a general method for optimizing tone mapped images with respect to a given non local metric. In particular, if the metric is perceptual, i.e. it involves perceptual concepts, we provide an adequate minimization strategy. Experiments on a particular perceptual metric tested with different tone mapped images provided by several tone mapping operators validate our approach.

},
doi = {10.1137/140967209},
author = {Praveen Cyriac and Thomas Batard and Marcelo Bertalm{\'\i}o}
}
@conference {78,
title = {Generalized Gradient on Vector Bundle - Application to Image Denoising},
booktitle = {Proceedings of International Conference on Scale Space and Variational Methods in Computer Vision (SSVM-2013), Austria},
year = {2013},
month = {June, 2013},
abstract = {We introduce a gradient operator that generalizes the Euclidean and Riemannian gradients. This operator acts on sections of vector bundles and is determined by three geometric data: a Riemannian metric on the base manifold, a Riemannian metric and a covariant derivative on the vector bundle. Under the assumption that the covariant derivative is compatible with the metric of the vector bundle, we consider the problems of minimizing the L2 and L1 norms of the gradient. In the L2 case, the gradient descent for reaching the solutions is a heat equation of a differential operator of order two called connection Laplacian. We present an application to color image denoising by replacing the regularizing term in the Rudin-Osher-Fatemi (ROF) denoising model by the L1 norm of a generalized gradient associated with a well-chosen covariant derivative. Experiments are validated by computations of the PSNR and Q-index.

}, author = {Thomas Batard and Marcelo Bertalm{\'\i}o} } @conference {95, title = {Harmonic Flow for Histogram Matching}, booktitle = {Geometric Computation for Computer Vision GCCV, Guanajuato, Mexico}, year = {2013}, abstract = {We present a method to perform histogram matching between two color images based on the concept of harmonic mapping between Riemannian manifolds. The key idea is to associate the histogram of a color image to a Riemannian manifold. In this context, the energy of the matching between the two images is measured by the Dirichlet energy of the mapping between the Riemannian manifolds. Then, we assimilate optimal matchings to critical points of the Dirichlet energy. Such points are called harmonic maps. As there is no explicit expression for harmonic maps in general, we use a gradient descent\ ow with boundary condition to reach them, that we call harmonic\ ow. We present an application to color transfer, however many others applications can be envisaged using

this general framework.

Given any metric that compares images of dierent dynamic range, we propose a method to reduce their distance with respect to this metric. The key idea is to consider the metric as a non local operator. Then, we transform the problem of distance reduction into a non local variational problem. In this context, the low dynamic range image having the smallest distance with a given high dynamic range is the minimum of a suitable energy, and can be reached through a gradient descent algorithm. Dealing with an appropriate metric, we present an application to Tone Mapping Operator (TMO) optimization. We apply our gradient descent algorithm, where the initial conditions are Tone Mapped (TM) images. Experiments show that our algorithm does reduce the distance of the TM images with the high dynamic range source images, meaning that our method improves the corresponding TMOs.

}, author = {Praveen Cyriac and Thomas Batard and Marcelo Bertalm{\'\i}o} }