/* GNU Ocrad - Optical Character Recognition program Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include "common.h" #include "rectangle.h" #include "ucs.h" #include "bitmap.h" #include "blob.h" #include "character.h" Character::Character( const Character & c ) : Rectangle( c ), blobpv( c.blobpv ), gv( c.gv ) { for( unsigned i = 0; i < blobpv.size(); ++i ) blobpv[i] = new Blob( *c.blobpv[i] ); } Character & Character::operator=( const Character & c ) { if( this != &c ) { Rectangle::operator=( c ); for( unsigned i = 0; i < blobpv.size(); ++i ) delete blobpv[i]; blobpv = c.blobpv; for( unsigned i = 0; i < blobpv.size(); ++i ) blobpv[i] = new Blob( *c.blobpv[i] ); gv = c.gv; } return *this; } Character::~Character() { for( unsigned i = 0; i < blobpv.size(); ++i ) delete blobpv[i]; } // Return the filled area of the main blobs only (no recursive) // int Character::area() const { int a = 0; for( int i = 0; i < blobs(); ++i ) a += blobpv[i]->area(); return a; } const Blob & Character::blob( const int i ) const { if( i < 0 || i >= blobs() ) Ocrad::internal_error( "const blob, index out of bounds" ); return *blobpv[i]; } Blob & Character::blob( const int i ) { if( i < 0 || i >= blobs() ) Ocrad::internal_error( "blob, index out of bounds" ); return *blobpv[i]; } Blob & Character::main_blob() { int imax = 0; for( int i = 1; i < blobs(); ++i ) if( blobpv[i]->size() > blobpv[imax]->size() ) imax = i; return *blobpv[imax]; } void Character::shift_blobp( Blob * const p ) { add_rectangle( *p ); int i = blobs() - 1; for( ; i >= 0; --i ) { Blob & bi = *blobpv[i]; if( p->vcenter() > bi.vcenter() ) break; if( p->vcenter() == bi.vcenter() && p->hcenter() >= bi.hcenter() ) break; } blobpv.insert( blobpv.begin() + ( i + 1 ), p ); } void Character::insert_guess( const int i, const int code, const int value ) { if( i < 0 || i > guesses() ) Ocrad::internal_error( "insert_guess, index out of bounds" ); gv.insert( gv.begin() + i, Guess( code, value ) ); } void Character::delete_guess( const int i ) { if( i < 0 || i >= guesses() ) Ocrad::internal_error( "delete_guess, index out of bounds" ); gv.erase( gv.begin() + i ); } bool Character::set_merged_guess( const int code1, const int right1, const int code2, const int blob_index ) { if( blob_index < 0 || blob_index >= blobs() ) return false; const Blob & b = *blobpv[blob_index]; if( b.left() <= right1 && right1 < b.right() ) { only_guess( -(blob_index + 1), left() ); add_guess( code1, right1 ); add_guess( code2, right() ); return true; } return false; } void Character::swap_guesses( const int i, const int j ) { if( i < 0 || i >= guesses() || j < 0 || j >= guesses() ) Ocrad::internal_error( "swap_guesses, index out of bounds" ); const int code = gv[i].code; gv[i].code = gv[j].code; gv[j].code = code; } const Character::Guess & Character::guess( const int i ) const { if( i < 0 || i >= guesses() ) Ocrad::internal_error( "guess, index out of bounds" ); return gv[i]; } bool Character::maybe( const int code ) const { for( int i = 0; i < guesses(); ++i ) if( code == gv[i].code ) return true; return false; } /* bool Character::maybe_digit() const { for( int i = 0; i < guesses(); ++i ) if( UCS::isdigit( gv[i].code ) ) return true; return false; } bool Character::maybe_letter() const { for( int i = 0; i < guesses(); ++i ) if( UCS::isalpha( gv[i].code ) ) return true; return false; } */ void Character::join( Character & c ) { for( int i = 0; i < c.blobs(); ++i ) shift_blobp( c.blobpv[i] ); c.blobpv.clear(); } unsigned char Character::byte_result() const { if( guesses() ) { const unsigned char ch = UCS::map_to_byte( gv[0].code ); if( ch ) return ch; } return '_'; } const char * Character::utf8_result() const { if( guesses() ) { const char * s = UCS::ucs_to_utf8( gv[0].code ); if( *s ) return s; } return "_"; } void Character::print( const Control & control ) const { if( guesses() ) { if( !control.utf8 ) { unsigned char ch = UCS::map_to_byte( gv[0].code ); if( ch ) std::putc( ch, control.outfile ); } else if( gv[0].code ) std::fputs( UCS::ucs_to_utf8( gv[0].code ), control.outfile ); } else std::putc( '_', control.outfile ); } void Character::dprint( const Control & control, const Rectangle & charbox, const bool graph, const bool recursive ) const { if( graph || recursive ) std::fprintf( control.outfile, "%d guesses ", guesses() ); for( int i = 0; i < guesses(); ++i ) { if( !control.utf8 ) { unsigned char ch = UCS::map_to_byte( gv[i].code ); if( ch ) std::fprintf( control.outfile, "guess '%c', confidence %d ", ch, gv[i].value ); } else std::fprintf( control.outfile, "guess '%s', confidence %d ", UCS::ucs_to_utf8( gv[i].code ), gv[i].value ); if( !graph && !recursive ) break; } std::fputs( "\n", control.outfile ); if( graph ) { std::fprintf( control.outfile, "left = %d, top = %d, right = %d, bottom = %d\n", left(), top(), right(), bottom() ); std::fprintf( control.outfile, "width = %d, height = %d, hcenter = %d, vcenter = %d, black area = %d%%\n\n", width(), height(), hcenter(), vcenter(), ( area() * 100 ) / size() ); const int minrow = std::min( top(), charbox.top() ); const int maxrow = std::max( bottom(), charbox.bottom() ); for( int row = minrow; row <= maxrow; ++row ) { bool istop = ( row == top() ); bool isvc = ( row == vcenter() ); bool isbot = ( row == bottom() ); bool iscbtop = ( row == charbox.top() ); bool iscbvc = ( row == charbox.vcenter() ); bool iscbbot = ( row == charbox.bottom() ); bool ish1top = false, ish1bot = false, ish2top = false, ish2bot = false; if( blobs() == 1 && blobpv[0]->holes() ) { const Blob & b = *blobpv[0]; ish1top = ( row == b.hole(0).top() ); ish1bot = ( row == b.hole(0).bottom() ); if( b.holes() > 1 ) { ish2top = ( row == b.hole(1).top() ); ish2bot = ( row == b.hole(1).bottom() ); } } for( int col = left(); col <= right(); ++col ) { char ch = ( isvc && col == hcenter() ) ? '+' : '.'; for( int i = 0; i < blobs(); ++i ) { int id = blobpv[i]->id( row, col ); if( id != 0 ) { if( id > 0 ) ch = (ch == '+') ? 'C' : 'O'; else ch = (ch == '+') ? '=' : '-'; break; } } std::fprintf( control.outfile, " %c", ch ); } if( istop ) std::fprintf( control.outfile, " top(%d)", row ); if( isvc ) std::fprintf( control.outfile, " vcenter(%d)", row ); if( isbot ) std::fprintf( control.outfile, " bottom(%d)", row ); if( iscbtop ) std::fprintf( control.outfile, " box.top(%d)", row ); if( iscbvc ) std::fprintf( control.outfile, " box.vcenter(%d)", row ); if( iscbbot ) std::fprintf( control.outfile, " box.bottom(%d)", row ); if( ish1top ) std::fprintf( control.outfile, " h1.top(%d)", row ); if( ish1bot ) std::fprintf( control.outfile, " h1.bottom(%d)", row ); if( ish2top ) std::fprintf( control.outfile, " h2.top(%d)", row ); if( ish2bot ) std::fprintf( control.outfile, " h2.bottom(%d)", row ); std::fputs( "\n", control.outfile ); } std::fputs( "\n\n", control.outfile ); } } void Character::xprint( const Control & control ) const { std::fprintf( control.exportfile, "%3d %3d %2d %2d; %d", left(), top(), width(), height(), guesses() ); for( int i = 0; i < guesses(); ++i ) if( !control.utf8 ) { unsigned char ch = UCS::map_to_byte( gv[i].code ); if( !ch ) ch = '_'; std::fprintf( control.exportfile, ", '%c'%d", ch, gv[i].value ); } else std::fprintf( control.exportfile, ", '%s'%d", UCS::ucs_to_utf8( gv[i].code ), gv[i].value ); std::fputs( "\n", control.exportfile ); } void Character::apply_filter( const Filter & filter ) { if( filter.type() == Filter::none ) return; const int code = guesses() ? gv[0].code : 0; bool remove = false; switch( filter.type() ) { case Filter::none: // only for completeness break; case Filter::letters_only: remove = true; case Filter::letters: if( !UCS::isalpha( code ) && !UCS::isspace( code ) ) { for( int i = 1; i < guesses(); ++i ) if( UCS::isalpha( gv[i].code ) ) { swap_guesses( 0, i ); break; } if( guesses() && !UCS::isalpha( gv[0].code ) ) gv[0].code = UCS::to_nearest_letter( gv[0].code ); if( remove && ( !guesses() || !UCS::isalpha( gv[0].code ) ) ) only_guess( 0, 0 ); } break; case Filter::numbers_only: remove = true; case Filter::numbers: if( !UCS::isdigit( code ) && !UCS::isspace( code ) ) { for( int i = 1; i < guesses(); ++i ) if( UCS::isdigit( gv[i].code ) ) { swap_guesses( 0, i ); break; } if( guesses() && !UCS::isdigit( gv[0].code ) ) gv[0].code = UCS::to_nearest_digit( gv[0].code ); if( remove && ( !guesses() || !UCS::isdigit( gv[0].code ) ) ) only_guess( 0, 0 ); } break; } }