forked from Mapan/odoo17e
399 lines
17 KiB
Python
399 lines
17 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Part of Odoo. See LICENSE file for full copyright and licensing details.
|
|
|
|
import logging
|
|
|
|
from dateutil.relativedelta import relativedelta
|
|
from psycopg2 import IntegrityError, OperationalError
|
|
|
|
from odoo import api, fields, models, _lt, _
|
|
from odoo.exceptions import AccessError, UserError
|
|
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
ERROR_MESSAGES = {
|
|
'error_internal': _lt("An error occurred"),
|
|
'error_document_not_found': _lt("The document could not be found"),
|
|
'error_unsupported_format': _lt("Unsupported image format"),
|
|
'error_no_connection': _lt("Server not available. Please retry later"),
|
|
'error_maintenance': _lt("Server is currently under maintenance. Please retry later"),
|
|
'error_password_protected': _lt("Your PDF file is protected by a password. The OCR can't extract data from it"),
|
|
'error_too_many_pages': _lt("Your document contains too many pages"),
|
|
'error_invalid_account_token': _lt(
|
|
"The 'invoice_ocr' IAP account token is invalid. "
|
|
"Please delete it to let Odoo generate a new one or fill it with a valid token."),
|
|
'error_unsupported_size': _lt("The document has been rejected because it is too small"),
|
|
'error_no_page_count': _lt("Invalid PDF (Unable to get page count)"),
|
|
'error_pdf_conversion_to_images': _lt("Invalid PDF (Conversion error)"),
|
|
}
|
|
|
|
|
|
class ExtractMixin(models.AbstractModel):
|
|
""" Base model to inherit from to add extract functionality to a model. """
|
|
_name = 'extract.mixin'
|
|
_inherit = 'mail.thread.main.attachment'
|
|
_description = 'Base class to extract data from documents'
|
|
|
|
extract_state = fields.Selection([
|
|
('no_extract_requested', 'No extract requested'),
|
|
('not_enough_credit', 'Not enough credits'),
|
|
('error_status', 'An error occurred'),
|
|
('waiting_extraction', 'Waiting extraction'),
|
|
('extract_not_ready', 'waiting extraction, but it is not ready'),
|
|
('waiting_validation', 'Waiting validation'),
|
|
('to_validate', 'To validate'),
|
|
('done', 'Completed flow'),
|
|
],
|
|
'Extract state', default='no_extract_requested', required=True, copy=False)
|
|
extract_status = fields.Char('Extract status', copy=False)
|
|
extract_error_message = fields.Text('Error message', compute='_compute_error_message')
|
|
extract_document_uuid = fields.Char('ID of the request to IAP-OCR', copy=False, readonly=True)
|
|
extract_can_show_send_button = fields.Boolean('Can show the ocr send button', compute='_compute_show_send_button')
|
|
is_in_extractable_state = fields.Boolean(compute='_compute_is_in_extractable_state', store=True)
|
|
extract_state_processed = fields.Boolean(compute='_compute_extract_state_processed', store=True)
|
|
|
|
@api.depends('extract_status')
|
|
def _compute_error_message(self):
|
|
for record in self:
|
|
if record.extract_status in ('success', 'processing'):
|
|
record.extract_error_message = ''
|
|
else:
|
|
record.extract_error_message = ERROR_MESSAGES.get(
|
|
record.extract_status, ERROR_MESSAGES['error_internal']
|
|
)
|
|
|
|
@api.depends('extract_state')
|
|
def _compute_extract_state_processed(self):
|
|
for record in self:
|
|
record.extract_state_processed = record.extract_state == 'waiting_extraction'
|
|
|
|
@api.depends('is_in_extractable_state', 'extract_state', 'message_main_attachment_id')
|
|
def _compute_show_send_button(self):
|
|
for record in self:
|
|
record.extract_can_show_send_button = (
|
|
record._get_ocr_option_can_extract()
|
|
and record.message_main_attachment_id
|
|
and record.extract_state == 'no_extract_requested'
|
|
and record.is_in_extractable_state
|
|
)
|
|
|
|
@api.depends()
|
|
def _compute_is_in_extractable_state(self):
|
|
""" Compute the is_in_extractable_state field. This method is meant to be overridden """
|
|
return None
|
|
|
|
def _get_iap_account(self):
|
|
if self.company_id:
|
|
return self.env['iap.account'].with_context(allowed_company_ids=[self.company_id.id]).get('invoice_ocr')
|
|
else:
|
|
return self.env['iap.account'].get('invoice_ocr')
|
|
|
|
@api.model
|
|
def check_all_status(self):
|
|
for record in self.search(self._get_to_check_domain()):
|
|
record._try_to_check_ocr_status()
|
|
|
|
@api.model
|
|
def _contact_iap_extract(self, pathinfo, params):
|
|
""" Contact the IAP extract service and return the response. This method is meant to be overridden """
|
|
return {}
|
|
|
|
@api.model
|
|
def _cron_validate(self):
|
|
records_to_validate = self.search(self._get_validation_domain())
|
|
|
|
for record in records_to_validate:
|
|
try:
|
|
self._contact_iap_extract(
|
|
'validate',
|
|
params={
|
|
'document_token': record.extract_document_uuid,
|
|
'values': {
|
|
field: record._get_validation(field) for field in self._get_validation_fields()
|
|
}
|
|
}
|
|
)
|
|
except AccessError:
|
|
pass
|
|
|
|
records_to_validate.extract_state = 'done'
|
|
return records_to_validate
|
|
|
|
@staticmethod
|
|
def _get_ocr_selected_value(ocr_results, feature, default=None):
|
|
return ocr_results.get(feature, {}).get('selected_value', {}).get('content', default)
|
|
|
|
def _safe_upload(self):
|
|
"""
|
|
This function prevents any exception from being thrown during the upload of a document.
|
|
This is meant to be used for batch uploading where we don't want that an error rollbacks the whole transaction.
|
|
"""
|
|
try:
|
|
with self.env.cr.savepoint():
|
|
self.with_company(self.company_id)._upload_to_extract()
|
|
except Exception as e:
|
|
if not isinstance(e, (IntegrityError, OperationalError)):
|
|
self.extract_state = 'error_status'
|
|
self.extract_status = 'error_internal'
|
|
self.env['iap.account']._send_error_notification(
|
|
message=self._get_iap_bus_notification_error(),
|
|
)
|
|
_logger.warning("Couldn't upload %s with id %d: %s", self._name, self.id, str(e))
|
|
|
|
def _send_batch_for_digitization(self):
|
|
for rec in self:
|
|
rec._safe_upload()
|
|
|
|
def action_send_batch_for_digitization(self):
|
|
if any(not document.is_in_extractable_state for document in self):
|
|
raise UserError(self._get_user_error_invalid_state_message())
|
|
|
|
documents_to_send = self.filtered(
|
|
lambda doc: doc.extract_state in ('no_extract_requested', 'not_enough_credit', 'error_status')
|
|
)
|
|
|
|
if not documents_to_send:
|
|
self.env['iap.account']._send_status_notification(
|
|
message=_('The selected documents are already digitized'),
|
|
status='info',
|
|
)
|
|
return
|
|
|
|
if len(documents_to_send) < len(self):
|
|
self.env['iap.account']._send_status_notification(
|
|
message=_('Some documents were skipped as they were already digitized'),
|
|
status='info',
|
|
)
|
|
|
|
documents_to_send._send_batch_for_digitization()
|
|
|
|
if len(documents_to_send) == 1:
|
|
return {
|
|
'name': _('Document sent for digitization'),
|
|
'type': 'ir.actions.act_window',
|
|
'res_model': self._name,
|
|
'view_mode': 'form',
|
|
'views': [[False, 'form']],
|
|
'res_id': documents_to_send[0].id,
|
|
}
|
|
return {
|
|
'name': _('Documents sent for digitization'),
|
|
'type': 'ir.actions.act_window',
|
|
'res_model': self._name,
|
|
'view_mode': 'tree,form',
|
|
'target': 'current',
|
|
'domain': [('id', 'in', documents_to_send.ids)],
|
|
}
|
|
|
|
def action_manual_send_for_digitization(self):
|
|
""" Manually trigger the ocr flow for the records.
|
|
This function is meant to be overridden, and called with a title.
|
|
"""
|
|
self._upload_to_extract()
|
|
|
|
def buy_credits(self):
|
|
url = self.env['iap.account'].get_credits_url(base_url='', service_name='invoice_ocr')
|
|
return {
|
|
'type': 'ir.actions.act_url',
|
|
'url': url,
|
|
}
|
|
|
|
def check_ocr_status(self):
|
|
""" Actively check the status of the extraction on the concerned records. """
|
|
|
|
records_to_check = self.filtered(lambda a: a.extract_state in ['waiting_extraction', 'extract_not_ready'])
|
|
|
|
for record in records_to_check:
|
|
record._check_ocr_status()
|
|
|
|
limit = max(0, 20 - len(records_to_check))
|
|
if limit > 0:
|
|
records_to_preupdate = self.search([
|
|
('extract_state', 'in', ['waiting_extraction', 'extract_not_ready']),
|
|
('id', 'not in', records_to_check.ids),
|
|
('is_in_extractable_state', '=', True)], limit=limit)
|
|
for record in records_to_preupdate:
|
|
record._try_to_check_ocr_status()
|
|
|
|
def _get_user_infos(self):
|
|
user_infos = {
|
|
'user_lang': self.env.user.lang,
|
|
'user_email': self.env.user.email,
|
|
}
|
|
return user_infos
|
|
|
|
def _get_validation(self, field):
|
|
""" Return the validation of the record. This method is meant to be overridden """
|
|
return None
|
|
|
|
def _upload_to_extract(self):
|
|
""" Contacts IAP extract to parse the first attachment in the chatter."""
|
|
self.ensure_one()
|
|
if not self._get_ocr_option_can_extract():
|
|
return False
|
|
attachment = self.message_main_attachment_id
|
|
if attachment and self.extract_state in ['no_extract_requested', 'not_enough_credit', 'error_status']:
|
|
account_token = self._get_iap_account()
|
|
|
|
if not account_token.account_token:
|
|
self.extract_state = 'error_status'
|
|
self.extract_status = 'error_invalid_account_token'
|
|
return
|
|
|
|
user_infos = self._get_user_infos()
|
|
params = {
|
|
'dbuuid': self.env['ir.config_parameter'].sudo().get_param('database.uuid'),
|
|
'documents': [x.datas.decode('utf-8') for x in attachment],
|
|
'user_infos': user_infos,
|
|
'webhook_url': self._get_webhook_url(),
|
|
}
|
|
try:
|
|
result = self._contact_iap_extract('parse', params=params)
|
|
self.extract_status = result['status']
|
|
if result['status'] == 'success':
|
|
self.extract_state = 'waiting_extraction'
|
|
self.extract_document_uuid = result['document_token']
|
|
if self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True):
|
|
self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", False)
|
|
self.env['iap.account']._send_success_notification(
|
|
message=self._get_iap_bus_notification_success(),
|
|
)
|
|
self._upload_to_extract_success_callback()
|
|
elif result['status'] == 'error_no_credit':
|
|
self._send_no_credit_notification()
|
|
self.extract_state = 'not_enough_credit'
|
|
else:
|
|
self.extract_state = 'error_status'
|
|
_logger.warning(
|
|
'An error occurred during OCR parsing of %s %d. Status: %s',
|
|
self._name, self.id, self.extract_status,
|
|
)
|
|
except AccessError:
|
|
self.extract_state = 'error_status'
|
|
self.extract_status = 'error_no_connection'
|
|
if self.extract_state == 'error_status':
|
|
self.env['iap.account']._send_error_notification(
|
|
message=self._get_iap_bus_notification_error(),
|
|
)
|
|
|
|
def _send_no_credit_notification(self):
|
|
"""
|
|
Notify about the number of credit.
|
|
In order to avoid to spam people each hour, an ir.config_parameter is set
|
|
"""
|
|
|
|
self.env['iap.account']._send_no_credit_notification(
|
|
service_name='invoice_ocr',
|
|
title=_("Not enough credits for data extraction"),
|
|
)
|
|
|
|
#If we don't find the config parameter, we consider it True, because we don't want to notify if no credits has been bought earlier.
|
|
already_notified = self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True)
|
|
if already_notified:
|
|
return
|
|
try:
|
|
mail_template = self.env.ref('iap_extract.iap_extract_no_credit')
|
|
except ValueError:
|
|
#if the mail template has not been created by an upgrade of the module
|
|
return
|
|
iap_account = self._get_iap_account()
|
|
if iap_account:
|
|
# Get the email address of the creators of the records
|
|
res = self.env['res.users'].search_read([('id', '=', 2)], ['email'])
|
|
if res:
|
|
email_values = {
|
|
'email_to': res[0]['email']
|
|
}
|
|
mail_template.send_mail(iap_account.id, force_send=True, email_values=email_values)
|
|
self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", True)
|
|
|
|
def _validate_ocr(self):
|
|
documents_to_validate = self.filtered(lambda doc: doc.extract_state == 'waiting_validation')
|
|
documents_to_validate.extract_state = 'to_validate'
|
|
|
|
if documents_to_validate:
|
|
ocr_trigger_datetime = fields.Datetime.now() + relativedelta(minutes=self.env.context.get('ocr_trigger_delta', 0))
|
|
self._get_cron_ocr('validate')._trigger(at=ocr_trigger_datetime)
|
|
|
|
def _check_ocr_status(self, force_write=False):
|
|
""" Contact iap to get the actual status of the ocr request. """
|
|
self.ensure_one()
|
|
result = self._contact_iap_extract('get_result', params={'document_token': self.extract_document_uuid})
|
|
self.extract_status = result['status']
|
|
if result['status'] == 'success':
|
|
self.extract_state = 'waiting_validation'
|
|
# Set OdooBot as the author of the tracking message
|
|
self._track_set_author(self.env.ref('base.partner_root'))
|
|
ocr_results = result['results'][0]
|
|
self.with_company(self.company_id)._fill_document_with_results(ocr_results, force_write=force_write)
|
|
if 'full_text_annotation' in ocr_results:
|
|
self.message_main_attachment_id.index_content = ocr_results['full_text_annotation']
|
|
|
|
elif result['status'] == 'processing':
|
|
self.extract_state = 'extract_not_ready'
|
|
else:
|
|
self.extract_state = 'error_status'
|
|
|
|
def _fill_document_with_results(self, ocr_results, force_write=False):
|
|
""" Fill the document with the results of the OCR. This method is meant to be overridden """
|
|
raise NotImplementedError()
|
|
|
|
def _get_cron_ocr(self, ocr_action):
|
|
""" Return the cron used to validate the documents, based on the module name.
|
|
ocr_action can be 'validate'.
|
|
"""
|
|
module_name = self._get_ocr_module_name()
|
|
return self.env.ref(f'{module_name}.ir_cron_ocr_{ocr_action}')
|
|
|
|
def _get_iap_bus_notification_success(self):
|
|
return _("Document is being digitized")
|
|
|
|
def _get_iap_bus_notification_error(self):
|
|
return _("An error occurred during the upload")
|
|
|
|
def _get_ocr_module_name(self):
|
|
""" Returns the name of the module. This method is meant to be overridden """
|
|
return 'iap_extract'
|
|
|
|
def _get_ocr_option_can_extract(self):
|
|
""" Returns if we can use the extract capabilities of the module. This method is meant to be overridden """
|
|
return False
|
|
|
|
def _get_to_check_domain(self):
|
|
return [('is_in_extractable_state', '=', True),
|
|
('extract_state', 'in', ['waiting_extraction', 'extract_not_ready'])]
|
|
|
|
def _get_validation_domain(self):
|
|
return [('extract_state', '=', 'to_validate')]
|
|
|
|
def _get_validation_fields(self):
|
|
""" Returns the fields that should be checked to validate the record. This method is meant to be overridden """
|
|
return []
|
|
|
|
def _get_webhook_url(self):
|
|
""" Return the webhook url based on the module name. """
|
|
baseurl = self.get_base_url()
|
|
module_name = self._get_ocr_module_name()
|
|
return f'{baseurl}/{module_name}/request_done'
|
|
|
|
def _get_user_error_invalid_state_message(self):
|
|
"""
|
|
Returns the message of the UserError when the user tries to send a document in an invalid state.
|
|
This method is meant to be overridden.
|
|
"""
|
|
return ''
|
|
|
|
def _upload_to_extract_success_callback(self):
|
|
""" This method is called when the OCR flow is successful. This method is meant to be overridden """
|
|
return None
|
|
|
|
def _try_to_check_ocr_status(self):
|
|
self.ensure_one()
|
|
try:
|
|
with self.env.cr.savepoint():
|
|
self._check_ocr_status()
|
|
self.env.cr.commit()
|
|
except Exception as e:
|
|
_logger.warning("Couldn't check OCR status of %s with id %d: %s", self._name, self.id, str(e))
|