diff --git a/ocrd_tesserocr/crop.py b/ocrd_tesserocr/crop.py index ca66f8e..47ccabc 100644 --- a/ocrd_tesserocr/crop.py +++ b/ocrd_tesserocr/crop.py @@ -140,17 +140,17 @@ def estimate_bounds(self, page, page_image, tessapi, zoom=1.0): bin_bbox = image.getbbox() if not bin_bbox: # this does happen! - self.logger.info("Ignoring region '%s' because its binarization is empty", ID) + self.logger.warning("Ignoring region '%s' because its binarization is empty", ID) continue width = bin_bbox[2]-bin_bbox[0] if width < 25 / zoom: # we must be conservative here: page numbers are tiny regions, too! - self.logger.info("Ignoring region '%s' because its width is too small (%d)", ID, width) + self.logger.warning("Ignoring region '%s' because its width is too small (%d)", ID, width) continue height = bin_bbox[3]-bin_bbox[1] if height < 25 / zoom: # we must be conservative here: page numbers are tiny regions, too! - self.logger.debug("Ignoring region '%s' because its height is too small (%d)", ID, height) + self.logger.warning("Ignoring region '%s' because its height is too small (%d)", ID, height) continue all_left = min(all_left, left) all_top = min(all_top, top) diff --git a/ocrd_tesserocr/recognize.py b/ocrd_tesserocr/recognize.py index 2cb3f44..d4299a6 100644 --- a/ocrd_tesserocr/recognize.py +++ b/ocrd_tesserocr/recognize.py @@ -534,7 +534,7 @@ def _process_regions_in_page(self, result_it, page, page_coords, mapping, dpi): coords = CoordsType(points=points) # plausibilise candidate if polygon2 is None: - self.logger.info('Ignoring extant region: %s', points) + self.logger.warning('Ignoring extant region: %s', points) continue block_type = it.BlockType() if block_type in [ @@ -548,11 +548,11 @@ def _process_regions_in_page(self, result_it, page, page_coords, mapping, dpi): PT.TABLE] and ( xywh['w'] < 20 / 300.0*(dpi or 300) or xywh['h'] < 10 / 300.0*(dpi or 300)): - self.logger.info('Ignoring too small region: %s', points) + self.logger.warning('Ignoring too small region: %s', points) continue region_image_bin = it.GetBinaryImage(RIL.BLOCK) if not region_image_bin or not region_image_bin.getbbox(): - self.logger.info('Ignoring binary-empty region: %s', points) + self.logger.warning('Ignoring binary-empty region: %s', points) continue # # keep and annotate new region @@ -560,8 +560,7 @@ def _process_regions_in_page(self, result_it, page, page_coords, mapping, dpi): # # region type switch block_type = it.BlockType() - self.logger.info("Detected region '%s': %s (%s)", - ID, points, membername(PT, block_type)) + self.logger.info("Detected region '%s' (%s)", ID, membername(PT, block_type)) if block_type in [PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, @@ -671,10 +670,10 @@ def _process_cells_in_table(self, result_it, region, rogroup, page_coords, mappi points = points_from_polygon(polygon) coords = CoordsType(points=points) if polygon2 is None: - self.logger.info('Ignoring extant cell: %s', points) + self.logger.warning('Ignoring extant cell: %s', points) continue ID = region.id + "_cell%04d" % index - self.logger.info("Detected cell '%s': %s", ID, points) + self.logger.info("Detected cell '%s'", ID) cell = TextRegionType(id=ID, Coords=coords) region.add_TextRegion(cell) self._add_orientation(it, cell, page_coords) @@ -720,10 +719,10 @@ def _process_lines_in_region(self, result_it, region, page_coords, mapping, pare points = points_from_polygon(polygon) coords = CoordsType(points=points) if polygon2 is None: - self.logger.info('Ignoring extant line: %s', points) + self.logger.warning('Ignoring extant line: %s', points) continue ID = region.id + "_line%04d" % index - self.logger.info("Detected line '%s': %s", ID, points) + self.logger.info("Detected line '%s'", ID) line = TextLineType(id=ID, Coords=coords) region.add_TextLine(line) if self.parameter['textequiv_level'] != 'line': @@ -755,7 +754,7 @@ def _process_words_in_line(self, result_it, line, coords, mapping): polygon = polygon2 points = points_from_polygon(polygon) if polygon2 is None: - self.logger.info('Ignoring extant word: %s', points) + self.logger.warning('Ignoring extant word: %s', points) continue ID = line.id + "_word%04d" % index self.logger.debug("Detected word '%s': %s", ID, points) @@ -779,7 +778,7 @@ def _process_glyphs_in_word(self, result_it, word, coords, mapping): polygon = polygon2 points = points_from_polygon(polygon) if polygon2 is None: - self.logger.info('Ignoring extant glyph: %s', points) + self.logger.warning('Ignoring extant glyph: %s', points) continue ID = word.id + '_glyph%04d' % index #self.logger.debug("Detected glyph '%s': %s", ID, points) @@ -1142,7 +1141,7 @@ def _add_orientation(self, result_it, region, coords): # defined as 'how many radians does one have to rotate the block anti-clockwise' # i.e. positive amount to be applied counter-clockwise for deskewing: deskew_angle *= 180 / math.pi - self.logger.debug('orientation/deskewing for %s: %s / %s / %s / %.3f°', region.id, + self.logger.info('orientation/deskewing for %s: %s / %s / %s / %.3f°', region.id, membername(Orientation, orientation), membername(WritingDirection, writing_direction), membername(TextlineOrder, textline_order),