Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
thresholder.cpp
Go to the documentation of this file.
1 
2 // File: thresholder.cpp
3 // Description: Base API for thresolding images in tesseract.
4 // Author: Ray Smith
5 // Created: Mon May 12 11:28:15 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "allheaders.h"
21 
22 #include "thresholder.h"
23 
24 #include <string.h>
25 
26 #include "img.h"
27 #include "otsuthr.h"
28 
29 namespace tesseract {
30 
32  : pix_(NULL),
33  image_data_(NULL),
34  image_width_(0), image_height_(0),
35  image_bytespp_(0), image_bytespl_(0),
36  scale_(1), yres_(300), estimated_res_(300) {
37  SetRectangle(0, 0, 0, 0);
38 }
39 
41  Clear();
42 }
43 
44 // Destroy the Pix if there is one, freeing memory.
46  if (pix_ != NULL) {
47  pixDestroy(&pix_);
48  pix_ = NULL;
49  }
50  image_data_ = NULL;
51 }
52 
53 // Return true if no image has been set.
55  if (pix_ != NULL)
56  return false;
57  return image_data_ == NULL;
58 }
59 
60 // SetImage makes a copy of only the metadata, not the underlying
61 // image buffer. It promises to treat the source as read-only in either case,
62 // but in return assumes that the Pix or image buffer remain valid
63 // throughout the life of the ImageThresholder.
64 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
65 // Palette color images will not work properly and must be converted to
66 // 24 bit.
67 // Binary images of 1 bit per pixel may also be given but they must be
68 // byte packed with the MSB of the first byte being the first pixel, and a
69 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
70 void ImageThresholder::SetImage(const unsigned char* imagedata,
71  int width, int height,
72  int bytes_per_pixel, int bytes_per_line) {
73  if (pix_ != NULL)
74  pixDestroy(&pix_);
75  pix_ = NULL;
76  image_data_ = imagedata;
77  image_width_ = width;
78  image_height_ = height;
79  image_bytespp_ = bytes_per_pixel;
80  image_bytespl_ = bytes_per_line;
81  scale_ = 1;
82  estimated_res_ = yres_ = 300;
83  Init();
84 }
85 
86 // Store the coordinates of the rectangle to process for later use.
87 // Doesn't actually do any thresholding.
88 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
89  rect_left_ = left;
90  rect_top_ = top;
91  rect_width_ = width;
92  rect_height_ = height;
93 }
94 
95 // Get enough parameters to be able to rebuild bounding boxes in the
96 // original image (not just within the rectangle).
97 // Left and top are enough with top-down coordinates, but
98 // the height of the rectangle and the image are needed for bottom-up.
99 void ImageThresholder::GetImageSizes(int* left, int* top,
100  int* width, int* height,
101  int* imagewidth, int* imageheight) {
102  *left = rect_left_;
103  *top = rect_top_;
104  *width = rect_width_;
105  *height = rect_height_;
106  *imagewidth = image_width_;
107  *imageheight = image_height_;
108 }
109 
110 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
111 // input, so the source pix may be pixDestroyed immediately after.
112 void ImageThresholder::SetImage(const Pix* pix) {
113  image_data_ = NULL;
114  if (pix_ != NULL)
115  pixDestroy(&pix_);
116  Pix* src = const_cast<Pix*>(pix);
117  int depth;
118  pixGetDimensions(src, &image_width_, &image_height_, &depth);
119  // Convert the image as necessary so it is one of binary, plain RGB, or
120  // 8 bit with no colormap.
121  if (depth > 1 && depth < 8) {
122  pix_ = pixConvertTo8(src, false);
123  } else if (pixGetColormap(src)) {
124  pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
125  } else {
126  pix_ = pixClone(src);
127  }
128  depth = pixGetDepth(pix_);
129  image_bytespp_ = depth / 8;
130  image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
131  scale_ = 1;
132  estimated_res_ = yres_ = pixGetYRes(src);
133  Init();
134 }
135 
136 // Threshold the source image as efficiently as possible to the output Pix.
137 // Creates a Pix and sets pix to point to the resulting pointer.
138 // Caller must use pixDestroy to free the created Pix.
140  if (pix_ != NULL) {
141  if (image_bytespp_ == 0) {
142  // We have a binary image, so it just has to be cloned.
143  *pix = GetPixRect();
144  } else {
145  if (image_bytespp_ == 4) {
146  // Color data can just be passed direct.
147  const uinT32* data = pixGetData(pix_);
148  OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data),
150  } else {
151  // Convert 8-bit to IMAGE and then pass its
152  // buffer to the raw interface to complete the conversion.
153  IMAGE temp_image;
154  temp_image.FromPix(pix_);
155  OtsuThresholdRectToPix(temp_image.get_buffer(),
157  COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
158  temp_image.get_bpp()),
159  pix);
160  }
161  }
162  return;
163  }
164  if (image_bytespp_ > 0) {
165  // Threshold grey or color.
167  } else {
168  RawRectToPix(pix);
169  }
170 }
171 
172 // Common initialization shared between SetImage methods.
175 }
176 
177 // Get a clone/copy of the source image rectangle.
178 // The returned Pix must be pixDestroyed.
179 // This function will be used in the future by the page layout analysis, and
180 // the layout analysis that uses it will only be available with Leptonica,
181 // so there is no raw equivalent.
183  if (pix_ != NULL) {
184  if (IsFullImage()) {
185  // Just clone the whole thing.
186  return pixClone(pix_);
187  } else {
188  // Crop to the given rectangle.
189  Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
190  Pix* cropped = pixClipRectangle(pix_, box, NULL);
191  boxDestroy(&box);
192  return cropped;
193  }
194  }
195  // The input is raw, so we have to make a copy of it.
196  Pix* raw_pix;
197  RawRectToPix(&raw_pix);
198  return raw_pix;
199 }
200 
201 // Get a clone/copy of the source image rectangle, reduced to greyscale.
202 // The returned Pix must be pixDestroyed.
203 // This function will be used in the future by the page layout analysis, and
204 // the layout analysis that uses it will only be available with Leptonica,
205 // so there is no raw equivalent.
207  Pix* pix = GetPixRect(); // May have to be reduced to grey.
208  int depth = pixGetDepth(pix);
209  if (depth != 8) {
210  Pix* result = depth < 8 ? pixConvertTo8(pix, false)
211  : pixConvertRGBToLuminance(pix);
212  pixDestroy(&pix);
213  return result;
214  }
215  return pix;
216 }
217 
218 // Otsu threshold the rectangle, taking everything except the image buffer
219 // pointer from the class, to the output Pix.
220 void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
221  int bytes_per_pixel,
222  int bytes_per_line,
223  Pix** pix) const {
224  int* thresholds;
225  int* hi_values;
226  OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
228  &thresholds, &hi_values);
229 
230  // Threshold the image to the given IMAGE.
231  ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line,
232  thresholds, hi_values, pix);
233  delete [] thresholds;
234  delete [] hi_values;
235 }
236 
237 // Threshold the rectangle, taking everything except the image buffer pointer
238 // from the class, using thresholds/hi_values to the output IMAGE.
239 void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata,
240  int bytes_per_pixel,
241  int bytes_per_line,
242  const int* thresholds,
243  const int* hi_values,
244  Pix** pix) const {
245  *pix = pixCreate(rect_width_, rect_height_, 1);
246  uinT32* pixdata = pixGetData(*pix);
247  int wpl = pixGetWpl(*pix);
248  const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line +
249  rect_left_ * bytes_per_pixel;
250  for (int y = 0; y < rect_height_; ++y) {
251  const uinT8* linedata = srcdata;
252  uinT32* pixline = pixdata + y * wpl;
253  for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) {
254  bool white_result = true;
255  for (int ch = 0; ch < bytes_per_pixel; ++ch) {
256  if (hi_values[ch] >= 0 &&
257  (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
258  white_result = false;
259  break;
260  }
261  }
262  if (white_result)
263  CLEAR_DATA_BIT(pixline, x);
264  else
265  SET_DATA_BIT(pixline, x);
266  }
267  srcdata += bytes_per_line;
268  }
269 }
270 
271 // Copy the raw image rectangle, taking all data from the class, to the Pix.
272 void ImageThresholder::RawRectToPix(Pix** pix) const {
273  if (image_bytespp_ < 4) {
274  // Go via a tesseract image structure (doesn't copy the data)
275  // and use ToPix.
276  IMAGE image;
277  int bits_per_pixel = image_bytespp_ * 8;
278  if (image_bytespp_ == 0)
279  bits_per_pixel = 1;
280  image.capture(const_cast<uinT8*>(image_data_),
281  image_width_, rect_top_ + rect_height_, bits_per_pixel);
282  if (IsFullImage()) {
283  *pix = image.ToPix();
284  } else {
285  IMAGE rect;
286  rect.create(rect_width_, rect_height_, bits_per_pixel);
287  // The capture chopped the image off at top+height, so copy
288  // the rectangle with y = 0 to get a rectangle of height
289  // starting at the bottom, since copy_sub_image uses bottom-up coords.
291  &rect, 0, 0, true);
292  *pix = rect.ToPix();
293  }
294  } else {
295  *pix = pixCreate(rect_width_, rect_height_, 32);
296  uinT32* data = pixGetData(*pix);
297  int wpl = pixGetWpl(*pix);
298  const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ +
300  for (int y = 0; y < rect_height_; ++y) {
301  const uinT8* linedata = imagedata;
302  uinT32* line = data + y * wpl;
303  for (int x = 0; x < rect_width_; ++x) {
304  line[x] = (linedata[0] << 24) | (linedata[1] << 16) |
305  (linedata[2] << 8) | linedata[3];
306  linedata += 4;
307  }
308  imagedata += image_bytespl_;
309  }
310  }
311 }
312 
313 } // namespace tesseract.
314