; docformat = 'rst' ; ; NAME: ; cgOTSU_THRESHOLD ; ; PURPOSE: ; The purpose of this function is to find an optimal threshold for separating ; a bimodal distribution of pixels in an image histogram. The Otsu Threshold method ; is explained here: http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html. ; ;******************************************************************************************; ; ; ; Copyright (c) 2012, by Fanning Software Consulting, Inc. All rights reserved. ; ; ; ; Redistribution and use in source and binary forms, with or without ; ; modification, are permitted provided that the following conditions are met: ; ; ; ; * Redistributions of source code must retain the above copyright ; ; notice, this list of conditions and the following disclaimer. ; ; * Redistributions in binary form must reproduce the above copyright ; ; notice, this list of conditions and the following disclaimer in the ; ; documentation and/or other materials provided with the distribution. ; ; * Neither the name of Fanning Software Consulting, Inc. nor the names of its ; ; contributors may be used to endorse or promote products derived from this ; ; software without specific prior written permission. ; ; ; ; THIS SOFTWARE IS PROVIDED BY FANNING SOFTWARE CONSULTING, INC. ''AS IS'' AND ANY ; ; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES ; ; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ; ; SHALL FANNING SOFTWARE CONSULTING, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, ; ; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ; ; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; ; ; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ; ; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ; ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ; ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ; ;******************************************************************************************; ; ;+ ; The purpose of this function is to find an optimal threshold for separating ; a bimodal distribution of pixels in an image histogram. The algorithm used is the ; "faster approach" algorithm explained ; `on this web page <http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html>`. ; ; .. image:: cgotsu_threshold.png ; ; :Categories: ; Utility ; ; :Returns: ; The optimal threshold that separates two populations of pixels is returned. ; ; :Params: ; data: in, required, ; The data from which the histogram is created. ; ; :Keywords: ; binsize: in, optional ; The binsize of the histogram. By default, Scott's Choice of bin size for histograms is used:: ; ; binsize = (3.5 * StdDev(data)) / N_Elements(data)^(0.3333) ; ; unless the data is byte type. Then a BINSIZE of 1 is used by default ; ; If BINSIZE in not defined, and NBINS is defined, the BINSIZE is calcuated as:: ; ; binsize = (Max(dataToHistogram) - Min(dataToHistogram)) / (NBINS -1) ; ; While it is pointed out in the HISTOGRAM documentation, it is extremely ; important that the BINSIZE be of the same data type as the data you are going to ; calculate the histogram of. If it is not VERY strange things can happen. I've ; tried to protect you from most of the bad things, but I don't have a high confidence ; level that I have done it for every situation. If you see something that "just don't ; look right", I would check first to see if your data types match. That might solve ; all your problems. ; example: in, optional, type=boolean, default=0 ; Set this keyword if you wish to use the example data from the ; `reference documentation <http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html>`. ; histdata: out, optional ; The output value of the internal HISTOGRAM command. ; l64: in, optional, type=boolean, default=0 ; If set, the return value of HISTOGRAM are 64-bit integers, rather than ; the default 32-bit integers. Set by default for data types greater than or ; equal to 12. ; locations: out, optional ; Starting locations of each bin. (See the HISTOGRAM documentation for details.) ; max: in, optional ; The maximum value to use in calculating input histogram. Equivalent to the MAX keyword ; in the HISTOGRAM documentation. ; min: in, optional ; The minimum value to use in calculating input histogram. Equivalent to the MIN keyword ; in the HISTOGRAM documentation. ; missing: in, optional ; The value that should be represented as "missing" and not used in the histogram. ; Be aware that if the input data is not of type "float" or "double" that the input ; data will be converted to floating point prior to calculating the histogram. ; nan: in, optional, type=boolean, default=0 ; If set, ignore NAN values in calculating and plotting histogram. ; nbins: in, optional, type=integer ; The number of output bins in the histogram. Meaning is slightly different from ; meaning in the HISTOGRAM command. Used only to calculate BINSIZE when BINSIZE is ; not specified. In this case, binsize = rangeofData/(nbins-1). ; omax: out, optional ; The maximum output value used to construct the histogram. (See HISTOGRAM documentation.) ; omin: out, optional ; The minimum output value used to construct the histogram. (See HISTOGRAM documentation.) ; plotit: in, optional, type=boolean, default=0 ; If this keyword is set, a histogram of the data is plotted along with a plot of the ; between class variance to show the selected threshold. ; reverse_indices: out, optional ; The list of reverse indices returned from the HISTOGRAM command. (See HISTOGRAM documentation.) ; ; :Examples: ; Set the `Example` keyword to use the data from the reference page. ; ; :Author: ; FANNING SOFTWARE CONSULTING:: ; David W. Fanning ; 1645 Sheely Drive ; Fort Collins, CO 80526 USA ; Phone: 970-221-0438 ; E-mail: david@idlcoyote.com ; Coyote's Guide to IDL Programming: http://www.idlcoyote.com ; ; :History: ; Change History:: ; Written by: David W. Fanning, 13 November 2012, from a program named OTSU_THRESHOLD by Carl Salvaggio and ; modified by Gianguido Cianci. ; The OTSU_THRESHOLD algorithm used previously made many assumptions about the data. The algorithm used here ; has been completely rewritten to comply with the values in the reference page and to avoid making ; assumptions about the data used to create the histogram. 21 November 2012. DWF. ; Modified to set L64 keyword if data type GE 14 (suggested by user). 22 November 2012. DWF. ; Modified the threshold value to use DIndGen instead of IndGen, which was causing incorrect ; results with integer data. 24 November 2012. DWF. ; ; :Copyright: ; Copyright (c) 2012, Fanning Software Consulting, Inc. ;- FUNCTION cgOTSU_THRESHOLD, $ ; The program name. data, $ ; The data to threshold. BINSIZE=binsize, $ ; The histogram bin size. EXAMPLE=example, $ ; Set this keyword to see the reference page example. HISTDATA=histdata, $ ; The output of the HISTOGRAM command. L64=l64, $ ; Input for HISTOGRAM. LOCATIONS=locations, $ ; The histogram bin locations. MAX=max, $ ; The maximum value to HISTOGRAM. MIN=min, $ ; The minimum value to HISTOGRAM. MISSING=missing, $ ; The value that indicates "missing" data to be excluded from the histgram. NAN=nan, $ ; Check for NAN. NBINS=nbins, $ ; The number of bins to display. OMAX=omax, $ ; The maximum value of the histogram on output. OMIN=omin, $ ; The minimum value of the histogram on output. PLOTIT=plotit, $ ; Set this keyword to see the results of the thresholding algorithm. REVERSE_INDICES=ri ; The reverse indices of the histogram. Compile_Opt idl2 ; Error handling. Catch, theError IF theError NE 0 THEN BEGIN Catch, /Cancel ok = cgErrorMsg() IF N_Elements(nancount) EQ 0 THEN BEGIN IF N_Elements(_data) NE 0 THEN data = Temporary(_data) ENDIF ELSE BEGIN IF nancount EQ 0 THEN BEGIN IF N_Elements(_data) NE 0 THEN data = Temporary(_data) ENDIF ENDELSE RETURN, -9999 ENDIF ; Need data or the EXAMPLE keyword to continue. IF N_Elements(data) EQ 0 && ~Keyword_Set(example) THEN BEGIN Message, 'Data values to threshold are required.' ENDIF ; Are we doing an example? Use the data from the reference page at ; http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html. IF Keyword_Set(example) THEN BEGIN data = [Replicate(0,8), Replicate(1,7), Replicate(2,2), Replicate(3,6), Replicate(4,9), Replicate(5,4)] binsize = 1 ENDIF ; Get the data type. Important to match data type with binsize type. Otherwise ; strange things occur in the Histogram command. dataType = Size(data, /TYPE) ; If this is byte data, then use a BINSIZE of 1, unless instructed otherwise. IF dataType EQ 1 THEN BEGIN IF (N_Elements(binsize) EQ 0) && (N_Elements(nbins) EQ 0) THEN binsize = 1B ENDIF ; If the data type is 14 or above, set the L64 keyword. Necessary to give enough ; precision in the Otsu calculations. IF dataType GE 14 THEN L64 = 1 ; Check the data for NANs and alert the user if the NAN keyword is not set. IF dataType EQ 4 OR datatype EQ 5 THEN BEGIN goodIndices = Where(Finite(data), count, NCOMPLEMENT=nancount, COMPLEMENT=nanIndices) IF nancount GT 0 THEN BEGIN IF ~Keyword_Set(nan) THEN BEGIN Message, 'NANs found in the data. NAN keyword is set to 1.', /INFORMATIONAL nan = 1 ENDIF ENDIF ENDIF ; The only sensible way to proceed is to make a copy of the data. Otherwise, I'll have ; a devil of a time putting it back together again at the end. There is a bug in ; HISTOGRAM when using BYTE data, so convert that here IF N_Elements(_data) EQ 0 THEN BEGIN IF Size(data, /TNAME) EQ 'BYTE' THEN BEGIN _data = Fix(data) ENDIF ELSE BEGIN _data = data ENDELSE ENDIF ; If you have any "missing" data, then the data needs to be converted to float ; and the missing data set to F_NAN. IF N_Elements(missing) NE 0 THEN BEGIN missingIndices = Where(_data EQ missing, missingCount) IF missingCount GT 0 THEN BEGIN CASE datatype OF 4: _data[missingIndices] = !Values.F_NAN 5: _data[missingIndices] = !Values.D_NAN ELSE: BEGIN _data = Float(_data) dataType = 4 _data[missingIndices] = !Values.F_NAN END ENDCASE nan = 1 ENDIF ELSE BEGIN IF missingCount EQ N_Elements(_data) THEN $ Message, 'All values are "missing"!' ENDELSE ENDIF ; Check for histogram keywords. IF N_Elements(binsize) EQ 0 THEN BEGIN range = Max(_data, /NAN) - Min(_data, /NAN) IF N_Elements(nbins) EQ 0 THEN BEGIN ; Scott's Choice binsize = (3.5D * StdDev(_data, /NAN))/N_Elements(_data)^(1./3.0D) IF (dataType LE 3) OR (dataType GE 12) THEN binsize = Round(binsize) > 1 binsize = Convert_To_Type(binsize, dataType) ENDIF ELSE BEGIN binsize = range / (nbins -1) IF dataType LE 3 THEN binsize = Round(binsize) > 1 binsize = Convert_To_Type(binsize, dataType) ENDELSE ENDIF ELSE BEGIN IF Size(binsize, /TYPE) NE dataType THEN BEGIN IF dataType LE 3 THEN binsize = Round(binsize) > 1 binsize = Convert_To_Type(binsize, dataType) ENDIF ENDELSE IF N_Elements(min) EQ 0 THEN min = Min(_data, NAN=nan) IF N_Elements(max) EQ 0 THEN max = Max(_data, NAN=nan) ; Calculate the histogram. histdata = Histogram(_data, $ BINSIZE=binsize, $ L64=l64, $ MAX=max, $ MIN=min, $ NAN=nan, $ LOCATIONS=locations, $ OMAX=omax, $ OMIN=omin, $ REVERSE_INDICES=ri) ; Lot's of bad things can happen next. Let's pretend we don't know about them. except = !Except !Except = 0 ; The threshold values to evaluate. thresholds = DIndGen(N_Elements(histdata)) * binsize + oMin ; Create a cumulative distribution to calculate the weighting factors. ; Subscripting of the background weights and addition of a 0 value ; is necessary to conform with the outputs in the reference documenation. ; I presume it is because the first threshold should be on the near side ; of the first bin, rather than on the far side. cdf = Total(histdata, /DOUBLE, /CUMULATIVE) reverseCDF = Total(Reverse(histdata), /DOUBLE, /CUMULATIVE) Wb = [0,cdf[0:N_Elements(cdf)-2]] / Total(histdata) Wf = Reverse(reverseCDF / Total(histdata)) ; Find the means. mu_b = Total(histdata * thresholds, /DOUBLE, /CUMULATIVE) / cdf mu_b = [0, mu_b[0:N_Elements(mu_b)-2]] mu_f = Reverse(Total(Reverse(histdata) * Reverse(thresholds), /DOUBLE, /CUMULATIVE) / reverseCDF) ; Calculate the Between-Class variance. betweenClassVariance = Wb * Wf * (mu_b - mu_f)^2 ; The threshold is found by locating the maximum value and ; obtaining the index into the array. maximumVariance = Max(betweenClassVariance, thresholdIndex) threshold = thresholdIndex*binsize + oMin ; Useful printouts if we are doing the example. IF Keyword_Set(example) THEN BEGIN Print, 'Wb: ', Wb Print, 'Wf: ', Wf Print, 'Mu_b: ', mu_b Print, 'Mu_f: ', mu_f Print, 'Variance: ', betweenClassVariance Print, 'Threshold: ', threshold cgDisplay, Title='Example OTSU Threshold Method', /Free !P.Multi = [0,1,2] cgHistoplot, _data, Binsize=binsize, /Fill cgPlots, [threshold, threshold], !Y.CRange, Color='blue', Thick=2 cgPlot, betweenClassVariance, Title='Between Class Variance' cgPlots, [threshold, threshold], !Y.CRange, Color='blue', Thick=2 cgText, 0.23, 2.60, 'Threshold: ' + String(threshold, Format='(I0)'), Color='blue', Font=0 !P.Multi = 0 ENDIF ; Need a plot? IF Keyword_Set(plotit) THEN BEGIN cgDisplay, Title='OTSU Threshold Results', /Free !P.Multi = [0,1,2] cgHistoplot, _data, $ BINSIZE=binsize, $ L64=l64, $ LOCATIONS=locations, $ MAXINPUT=max, $ MININPUT=min, $ NAN=nan, $ /Fill cgPlots, [threshold, threshold], !Y.CRange, Color='blue', Thick=2 cgPlot, locations, betweenClassVariance, Title='Between Class Variance Threshold: ' + $ String(threshold,Format='(F0.2)'), XStyle=1 cgPlots, [threshold, threshold], !Y.CRange, Color='blue', Thick=2 !P.Multi = 0 ENDIF ; Clean up. !Except = except ; Return result. RETURN, threshold END