# -*- coding: utf-8 -*- # Part of Odoo. See LICENSE file for full copyright and licensing details. import logging from dateutil.relativedelta import relativedelta from psycopg2 import IntegrityError, OperationalError from odoo import api, fields, models, _lt, _ from odoo.exceptions import AccessError, UserError _logger = logging.getLogger(__name__) ERROR_MESSAGES = { 'error_internal': _lt("An error occurred"), 'error_document_not_found': _lt("The document could not be found"), 'error_unsupported_format': _lt("Unsupported image format"), 'error_no_connection': _lt("Server not available. Please retry later"), 'error_maintenance': _lt("Server is currently under maintenance. Please retry later"), 'error_password_protected': _lt("Your PDF file is protected by a password. The OCR can't extract data from it"), 'error_too_many_pages': _lt("Your document contains too many pages"), 'error_invalid_account_token': _lt( "The 'invoice_ocr' IAP account token is invalid. " "Please delete it to let Odoo generate a new one or fill it with a valid token."), 'error_unsupported_size': _lt("The document has been rejected because it is too small"), 'error_no_page_count': _lt("Invalid PDF (Unable to get page count)"), 'error_pdf_conversion_to_images': _lt("Invalid PDF (Conversion error)"), } class ExtractMixin(models.AbstractModel): """ Base model to inherit from to add extract functionality to a model. """ _name = 'extract.mixin' _inherit = 'mail.thread.main.attachment' _description = 'Base class to extract data from documents' extract_state = fields.Selection([ ('no_extract_requested', 'No extract requested'), ('not_enough_credit', 'Not enough credits'), ('error_status', 'An error occurred'), ('waiting_extraction', 'Waiting extraction'), ('extract_not_ready', 'waiting extraction, but it is not ready'), ('waiting_validation', 'Waiting validation'), ('to_validate', 'To validate'), ('done', 'Completed flow'), ], 'Extract state', default='no_extract_requested', required=True, copy=False) extract_status = fields.Char('Extract status', copy=False) extract_error_message = fields.Text('Error message', compute='_compute_error_message') extract_document_uuid = fields.Char('ID of the request to IAP-OCR', copy=False, readonly=True) extract_can_show_send_button = fields.Boolean('Can show the ocr send button', compute='_compute_show_send_button') is_in_extractable_state = fields.Boolean(compute='_compute_is_in_extractable_state', store=True) extract_state_processed = fields.Boolean(compute='_compute_extract_state_processed', store=True) @api.depends('extract_status') def _compute_error_message(self): for record in self: if record.extract_status in ('success', 'processing'): record.extract_error_message = '' else: record.extract_error_message = ERROR_MESSAGES.get( record.extract_status, ERROR_MESSAGES['error_internal'] ) @api.depends('extract_state') def _compute_extract_state_processed(self): for record in self: record.extract_state_processed = record.extract_state == 'waiting_extraction' @api.depends('is_in_extractable_state', 'extract_state', 'message_main_attachment_id') def _compute_show_send_button(self): for record in self: record.extract_can_show_send_button = ( record._get_ocr_option_can_extract() and record.message_main_attachment_id and record.extract_state == 'no_extract_requested' and record.is_in_extractable_state ) @api.depends() def _compute_is_in_extractable_state(self): """ Compute the is_in_extractable_state field. This method is meant to be overridden """ return None def _get_iap_account(self): if self.company_id: return self.env['iap.account'].with_context(allowed_company_ids=[self.company_id.id]).get('invoice_ocr') else: return self.env['iap.account'].get('invoice_ocr') @api.model def check_all_status(self): for record in self.search(self._get_to_check_domain()): record._try_to_check_ocr_status() @api.model def _contact_iap_extract(self, pathinfo, params): """ Contact the IAP extract service and return the response. This method is meant to be overridden """ return {} @api.model def _cron_validate(self): records_to_validate = self.search(self._get_validation_domain()) for record in records_to_validate: try: self._contact_iap_extract( 'validate', params={ 'document_token': record.extract_document_uuid, 'values': { field: record._get_validation(field) for field in self._get_validation_fields() } } ) except AccessError: pass records_to_validate.extract_state = 'done' return records_to_validate @staticmethod def _get_ocr_selected_value(ocr_results, feature, default=None): return ocr_results.get(feature, {}).get('selected_value', {}).get('content', default) def _safe_upload(self): """ This function prevents any exception from being thrown during the upload of a document. This is meant to be used for batch uploading where we don't want that an error rollbacks the whole transaction. """ try: with self.env.cr.savepoint(): self.with_company(self.company_id)._upload_to_extract() except Exception as e: if not isinstance(e, (IntegrityError, OperationalError)): self.extract_state = 'error_status' self.extract_status = 'error_internal' self.env['iap.account']._send_error_notification( message=self._get_iap_bus_notification_error(), ) _logger.warning("Couldn't upload %s with id %d: %s", self._name, self.id, str(e)) def _send_batch_for_digitization(self): for rec in self: rec._safe_upload() def action_send_batch_for_digitization(self): if any(not document.is_in_extractable_state for document in self): raise UserError(self._get_user_error_invalid_state_message()) documents_to_send = self.filtered( lambda doc: doc.extract_state in ('no_extract_requested', 'not_enough_credit', 'error_status') ) if not documents_to_send: self.env['iap.account']._send_status_notification( message=_('The selected documents are already digitized'), status='info', ) return if len(documents_to_send) < len(self): self.env['iap.account']._send_status_notification( message=_('Some documents were skipped as they were already digitized'), status='info', ) documents_to_send._send_batch_for_digitization() if len(documents_to_send) == 1: return { 'name': _('Document sent for digitization'), 'type': 'ir.actions.act_window', 'res_model': self._name, 'view_mode': 'form', 'views': [[False, 'form']], 'res_id': documents_to_send[0].id, } return { 'name': _('Documents sent for digitization'), 'type': 'ir.actions.act_window', 'res_model': self._name, 'view_mode': 'tree,form', 'target': 'current', 'domain': [('id', 'in', documents_to_send.ids)], } def action_manual_send_for_digitization(self): """ Manually trigger the ocr flow for the records. This function is meant to be overridden, and called with a title. """ self._upload_to_extract() def buy_credits(self): url = self.env['iap.account'].get_credits_url(base_url='', service_name='invoice_ocr') return { 'type': 'ir.actions.act_url', 'url': url, } def check_ocr_status(self): """ Actively check the status of the extraction on the concerned records. """ records_to_check = self.filtered(lambda a: a.extract_state in ['waiting_extraction', 'extract_not_ready']) for record in records_to_check: record._check_ocr_status() limit = max(0, 20 - len(records_to_check)) if limit > 0: records_to_preupdate = self.search([ ('extract_state', 'in', ['waiting_extraction', 'extract_not_ready']), ('id', 'not in', records_to_check.ids), ('is_in_extractable_state', '=', True)], limit=limit) for record in records_to_preupdate: record._try_to_check_ocr_status() def _get_user_infos(self): user_infos = { 'user_lang': self.env.user.lang, 'user_email': self.env.user.email, } return user_infos def _get_validation(self, field): """ Return the validation of the record. This method is meant to be overridden """ return None def _upload_to_extract(self): """ Contacts IAP extract to parse the first attachment in the chatter.""" self.ensure_one() if not self._get_ocr_option_can_extract(): return False attachment = self.message_main_attachment_id if attachment and self.extract_state in ['no_extract_requested', 'not_enough_credit', 'error_status']: account_token = self._get_iap_account() if not account_token.account_token: self.extract_state = 'error_status' self.extract_status = 'error_invalid_account_token' return user_infos = self._get_user_infos() params = { 'dbuuid': self.env['ir.config_parameter'].sudo().get_param('database.uuid'), 'documents': [x.datas.decode('utf-8') for x in attachment], 'user_infos': user_infos, 'webhook_url': self._get_webhook_url(), } try: result = self._contact_iap_extract('parse', params=params) self.extract_status = result['status'] if result['status'] == 'success': self.extract_state = 'waiting_extraction' self.extract_document_uuid = result['document_token'] if self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True): self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", False) self.env['iap.account']._send_success_notification( message=self._get_iap_bus_notification_success(), ) self._upload_to_extract_success_callback() elif result['status'] == 'error_no_credit': self._send_no_credit_notification() self.extract_state = 'not_enough_credit' else: self.extract_state = 'error_status' _logger.warning( 'An error occurred during OCR parsing of %s %d. Status: %s', self._name, self.id, self.extract_status, ) except AccessError: self.extract_state = 'error_status' self.extract_status = 'error_no_connection' if self.extract_state == 'error_status': self.env['iap.account']._send_error_notification( message=self._get_iap_bus_notification_error(), ) def _send_no_credit_notification(self): """ Notify about the number of credit. In order to avoid to spam people each hour, an ir.config_parameter is set """ self.env['iap.account']._send_no_credit_notification( service_name='invoice_ocr', title=_("Not enough credits for data extraction"), ) #If we don't find the config parameter, we consider it True, because we don't want to notify if no credits has been bought earlier. already_notified = self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True) if already_notified: return try: mail_template = self.env.ref('iap_extract.iap_extract_no_credit') except ValueError: #if the mail template has not been created by an upgrade of the module return iap_account = self._get_iap_account() if iap_account: # Get the email address of the creators of the records res = self.env['res.users'].search_read([('id', '=', 2)], ['email']) if res: email_values = { 'email_to': res[0]['email'] } mail_template.send_mail(iap_account.id, force_send=True, email_values=email_values) self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", True) def _validate_ocr(self): documents_to_validate = self.filtered(lambda doc: doc.extract_state == 'waiting_validation') documents_to_validate.extract_state = 'to_validate' if documents_to_validate: ocr_trigger_datetime = fields.Datetime.now() + relativedelta(minutes=self.env.context.get('ocr_trigger_delta', 0)) self._get_cron_ocr('validate')._trigger(at=ocr_trigger_datetime) def _check_ocr_status(self, force_write=False): """ Contact iap to get the actual status of the ocr request. """ self.ensure_one() result = self._contact_iap_extract('get_result', params={'document_token': self.extract_document_uuid}) self.extract_status = result['status'] if result['status'] == 'success': self.extract_state = 'waiting_validation' # Set OdooBot as the author of the tracking message self._track_set_author(self.env.ref('base.partner_root')) ocr_results = result['results'][0] self.with_company(self.company_id)._fill_document_with_results(ocr_results, force_write=force_write) if 'full_text_annotation' in ocr_results: self.message_main_attachment_id.index_content = ocr_results['full_text_annotation'] elif result['status'] == 'processing': self.extract_state = 'extract_not_ready' else: self.extract_state = 'error_status' def _fill_document_with_results(self, ocr_results, force_write=False): """ Fill the document with the results of the OCR. This method is meant to be overridden """ raise NotImplementedError() def _get_cron_ocr(self, ocr_action): """ Return the cron used to validate the documents, based on the module name. ocr_action can be 'validate'. """ module_name = self._get_ocr_module_name() return self.env.ref(f'{module_name}.ir_cron_ocr_{ocr_action}') def _get_iap_bus_notification_success(self): return _("Document is being digitized") def _get_iap_bus_notification_error(self): return _("An error occurred during the upload") def _get_ocr_module_name(self): """ Returns the name of the module. This method is meant to be overridden """ return 'iap_extract' def _get_ocr_option_can_extract(self): """ Returns if we can use the extract capabilities of the module. This method is meant to be overridden """ return False def _get_to_check_domain(self): return [('is_in_extractable_state', '=', True), ('extract_state', 'in', ['waiting_extraction', 'extract_not_ready'])] def _get_validation_domain(self): return [('extract_state', '=', 'to_validate')] def _get_validation_fields(self): """ Returns the fields that should be checked to validate the record. This method is meant to be overridden """ return [] def _get_webhook_url(self): """ Return the webhook url based on the module name. """ baseurl = self.get_base_url() module_name = self._get_ocr_module_name() return f'{baseurl}/{module_name}/request_done' def _get_user_error_invalid_state_message(self): """ Returns the message of the UserError when the user tries to send a document in an invalid state. This method is meant to be overridden. """ return '' def _upload_to_extract_success_callback(self): """ This method is called when the OCR flow is successful. This method is meant to be overridden """ return None def _try_to_check_ocr_status(self): self.ensure_one() try: with self.env.cr.savepoint(): self._check_ocr_status() self.env.cr.commit() except Exception as e: _logger.warning("Couldn't check OCR status of %s with id %d: %s", self._name, self.id, str(e))