1
0
forked from Mapan/odoo17e
odoo17e-kedaikipas58/addons/iap_extract/models/extract_mixin.py
2024-12-10 09:04:09 +07:00

399 lines
17 KiB
Python

# -*- coding: utf-8 -*-
# Part of Odoo. See LICENSE file for full copyright and licensing details.
import logging
from dateutil.relativedelta import relativedelta
from psycopg2 import IntegrityError, OperationalError
from odoo import api, fields, models, _lt, _
from odoo.exceptions import AccessError, UserError
_logger = logging.getLogger(__name__)
ERROR_MESSAGES = {
'error_internal': _lt("An error occurred"),
'error_document_not_found': _lt("The document could not be found"),
'error_unsupported_format': _lt("Unsupported image format"),
'error_no_connection': _lt("Server not available. Please retry later"),
'error_maintenance': _lt("Server is currently under maintenance. Please retry later"),
'error_password_protected': _lt("Your PDF file is protected by a password. The OCR can't extract data from it"),
'error_too_many_pages': _lt("Your document contains too many pages"),
'error_invalid_account_token': _lt(
"The 'invoice_ocr' IAP account token is invalid. "
"Please delete it to let Odoo generate a new one or fill it with a valid token."),
'error_unsupported_size': _lt("The document has been rejected because it is too small"),
'error_no_page_count': _lt("Invalid PDF (Unable to get page count)"),
'error_pdf_conversion_to_images': _lt("Invalid PDF (Conversion error)"),
}
class ExtractMixin(models.AbstractModel):
""" Base model to inherit from to add extract functionality to a model. """
_name = 'extract.mixin'
_inherit = 'mail.thread.main.attachment'
_description = 'Base class to extract data from documents'
extract_state = fields.Selection([
('no_extract_requested', 'No extract requested'),
('not_enough_credit', 'Not enough credits'),
('error_status', 'An error occurred'),
('waiting_extraction', 'Waiting extraction'),
('extract_not_ready', 'waiting extraction, but it is not ready'),
('waiting_validation', 'Waiting validation'),
('to_validate', 'To validate'),
('done', 'Completed flow'),
],
'Extract state', default='no_extract_requested', required=True, copy=False)
extract_status = fields.Char('Extract status', copy=False)
extract_error_message = fields.Text('Error message', compute='_compute_error_message')
extract_document_uuid = fields.Char('ID of the request to IAP-OCR', copy=False, readonly=True)
extract_can_show_send_button = fields.Boolean('Can show the ocr send button', compute='_compute_show_send_button')
is_in_extractable_state = fields.Boolean(compute='_compute_is_in_extractable_state', store=True)
extract_state_processed = fields.Boolean(compute='_compute_extract_state_processed', store=True)
@api.depends('extract_status')
def _compute_error_message(self):
for record in self:
if record.extract_status in ('success', 'processing'):
record.extract_error_message = ''
else:
record.extract_error_message = ERROR_MESSAGES.get(
record.extract_status, ERROR_MESSAGES['error_internal']
)
@api.depends('extract_state')
def _compute_extract_state_processed(self):
for record in self:
record.extract_state_processed = record.extract_state == 'waiting_extraction'
@api.depends('is_in_extractable_state', 'extract_state', 'message_main_attachment_id')
def _compute_show_send_button(self):
for record in self:
record.extract_can_show_send_button = (
record._get_ocr_option_can_extract()
and record.message_main_attachment_id
and record.extract_state == 'no_extract_requested'
and record.is_in_extractable_state
)
@api.depends()
def _compute_is_in_extractable_state(self):
""" Compute the is_in_extractable_state field. This method is meant to be overridden """
return None
def _get_iap_account(self):
if self.company_id:
return self.env['iap.account'].with_context(allowed_company_ids=[self.company_id.id]).get('invoice_ocr')
else:
return self.env['iap.account'].get('invoice_ocr')
@api.model
def check_all_status(self):
for record in self.search(self._get_to_check_domain()):
record._try_to_check_ocr_status()
@api.model
def _contact_iap_extract(self, pathinfo, params):
""" Contact the IAP extract service and return the response. This method is meant to be overridden """
return {}
@api.model
def _cron_validate(self):
records_to_validate = self.search(self._get_validation_domain())
for record in records_to_validate:
try:
self._contact_iap_extract(
'validate',
params={
'document_token': record.extract_document_uuid,
'values': {
field: record._get_validation(field) for field in self._get_validation_fields()
}
}
)
except AccessError:
pass
records_to_validate.extract_state = 'done'
return records_to_validate
@staticmethod
def _get_ocr_selected_value(ocr_results, feature, default=None):
return ocr_results.get(feature, {}).get('selected_value', {}).get('content', default)
def _safe_upload(self):
"""
This function prevents any exception from being thrown during the upload of a document.
This is meant to be used for batch uploading where we don't want that an error rollbacks the whole transaction.
"""
try:
with self.env.cr.savepoint():
self.with_company(self.company_id)._upload_to_extract()
except Exception as e:
if not isinstance(e, (IntegrityError, OperationalError)):
self.extract_state = 'error_status'
self.extract_status = 'error_internal'
self.env['iap.account']._send_error_notification(
message=self._get_iap_bus_notification_error(),
)
_logger.warning("Couldn't upload %s with id %d: %s", self._name, self.id, str(e))
def _send_batch_for_digitization(self):
for rec in self:
rec._safe_upload()
def action_send_batch_for_digitization(self):
if any(not document.is_in_extractable_state for document in self):
raise UserError(self._get_user_error_invalid_state_message())
documents_to_send = self.filtered(
lambda doc: doc.extract_state in ('no_extract_requested', 'not_enough_credit', 'error_status')
)
if not documents_to_send:
self.env['iap.account']._send_status_notification(
message=_('The selected documents are already digitized'),
status='info',
)
return
if len(documents_to_send) < len(self):
self.env['iap.account']._send_status_notification(
message=_('Some documents were skipped as they were already digitized'),
status='info',
)
documents_to_send._send_batch_for_digitization()
if len(documents_to_send) == 1:
return {
'name': _('Document sent for digitization'),
'type': 'ir.actions.act_window',
'res_model': self._name,
'view_mode': 'form',
'views': [[False, 'form']],
'res_id': documents_to_send[0].id,
}
return {
'name': _('Documents sent for digitization'),
'type': 'ir.actions.act_window',
'res_model': self._name,
'view_mode': 'tree,form',
'target': 'current',
'domain': [('id', 'in', documents_to_send.ids)],
}
def action_manual_send_for_digitization(self):
""" Manually trigger the ocr flow for the records.
This function is meant to be overridden, and called with a title.
"""
self._upload_to_extract()
def buy_credits(self):
url = self.env['iap.account'].get_credits_url(base_url='', service_name='invoice_ocr')
return {
'type': 'ir.actions.act_url',
'url': url,
}
def check_ocr_status(self):
""" Actively check the status of the extraction on the concerned records. """
records_to_check = self.filtered(lambda a: a.extract_state in ['waiting_extraction', 'extract_not_ready'])
for record in records_to_check:
record._check_ocr_status()
limit = max(0, 20 - len(records_to_check))
if limit > 0:
records_to_preupdate = self.search([
('extract_state', 'in', ['waiting_extraction', 'extract_not_ready']),
('id', 'not in', records_to_check.ids),
('is_in_extractable_state', '=', True)], limit=limit)
for record in records_to_preupdate:
record._try_to_check_ocr_status()
def _get_user_infos(self):
user_infos = {
'user_lang': self.env.user.lang,
'user_email': self.env.user.email,
}
return user_infos
def _get_validation(self, field):
""" Return the validation of the record. This method is meant to be overridden """
return None
def _upload_to_extract(self):
""" Contacts IAP extract to parse the first attachment in the chatter."""
self.ensure_one()
if not self._get_ocr_option_can_extract():
return False
attachment = self.message_main_attachment_id
if attachment and self.extract_state in ['no_extract_requested', 'not_enough_credit', 'error_status']:
account_token = self._get_iap_account()
if not account_token.account_token:
self.extract_state = 'error_status'
self.extract_status = 'error_invalid_account_token'
return
user_infos = self._get_user_infos()
params = {
'dbuuid': self.env['ir.config_parameter'].sudo().get_param('database.uuid'),
'documents': [x.datas.decode('utf-8') for x in attachment],
'user_infos': user_infos,
'webhook_url': self._get_webhook_url(),
}
try:
result = self._contact_iap_extract('parse', params=params)
self.extract_status = result['status']
if result['status'] == 'success':
self.extract_state = 'waiting_extraction'
self.extract_document_uuid = result['document_token']
if self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True):
self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", False)
self.env['iap.account']._send_success_notification(
message=self._get_iap_bus_notification_success(),
)
self._upload_to_extract_success_callback()
elif result['status'] == 'error_no_credit':
self._send_no_credit_notification()
self.extract_state = 'not_enough_credit'
else:
self.extract_state = 'error_status'
_logger.warning(
'An error occurred during OCR parsing of %s %d. Status: %s',
self._name, self.id, self.extract_status,
)
except AccessError:
self.extract_state = 'error_status'
self.extract_status = 'error_no_connection'
if self.extract_state == 'error_status':
self.env['iap.account']._send_error_notification(
message=self._get_iap_bus_notification_error(),
)
def _send_no_credit_notification(self):
"""
Notify about the number of credit.
In order to avoid to spam people each hour, an ir.config_parameter is set
"""
self.env['iap.account']._send_no_credit_notification(
service_name='invoice_ocr',
title=_("Not enough credits for data extraction"),
)
#If we don't find the config parameter, we consider it True, because we don't want to notify if no credits has been bought earlier.
already_notified = self.env['ir.config_parameter'].sudo().get_param("iap_extract.already_notified", True)
if already_notified:
return
try:
mail_template = self.env.ref('iap_extract.iap_extract_no_credit')
except ValueError:
#if the mail template has not been created by an upgrade of the module
return
iap_account = self._get_iap_account()
if iap_account:
# Get the email address of the creators of the records
res = self.env['res.users'].search_read([('id', '=', 2)], ['email'])
if res:
email_values = {
'email_to': res[0]['email']
}
mail_template.send_mail(iap_account.id, force_send=True, email_values=email_values)
self.env['ir.config_parameter'].sudo().set_param("iap_extract.already_notified", True)
def _validate_ocr(self):
documents_to_validate = self.filtered(lambda doc: doc.extract_state == 'waiting_validation')
documents_to_validate.extract_state = 'to_validate'
if documents_to_validate:
ocr_trigger_datetime = fields.Datetime.now() + relativedelta(minutes=self.env.context.get('ocr_trigger_delta', 0))
self._get_cron_ocr('validate')._trigger(at=ocr_trigger_datetime)
def _check_ocr_status(self, force_write=False):
""" Contact iap to get the actual status of the ocr request. """
self.ensure_one()
result = self._contact_iap_extract('get_result', params={'document_token': self.extract_document_uuid})
self.extract_status = result['status']
if result['status'] == 'success':
self.extract_state = 'waiting_validation'
# Set OdooBot as the author of the tracking message
self._track_set_author(self.env.ref('base.partner_root'))
ocr_results = result['results'][0]
self.with_company(self.company_id)._fill_document_with_results(ocr_results, force_write=force_write)
if 'full_text_annotation' in ocr_results:
self.message_main_attachment_id.index_content = ocr_results['full_text_annotation']
elif result['status'] == 'processing':
self.extract_state = 'extract_not_ready'
else:
self.extract_state = 'error_status'
def _fill_document_with_results(self, ocr_results, force_write=False):
""" Fill the document with the results of the OCR. This method is meant to be overridden """
raise NotImplementedError()
def _get_cron_ocr(self, ocr_action):
""" Return the cron used to validate the documents, based on the module name.
ocr_action can be 'validate'.
"""
module_name = self._get_ocr_module_name()
return self.env.ref(f'{module_name}.ir_cron_ocr_{ocr_action}')
def _get_iap_bus_notification_success(self):
return _("Document is being digitized")
def _get_iap_bus_notification_error(self):
return _("An error occurred during the upload")
def _get_ocr_module_name(self):
""" Returns the name of the module. This method is meant to be overridden """
return 'iap_extract'
def _get_ocr_option_can_extract(self):
""" Returns if we can use the extract capabilities of the module. This method is meant to be overridden """
return False
def _get_to_check_domain(self):
return [('is_in_extractable_state', '=', True),
('extract_state', 'in', ['waiting_extraction', 'extract_not_ready'])]
def _get_validation_domain(self):
return [('extract_state', '=', 'to_validate')]
def _get_validation_fields(self):
""" Returns the fields that should be checked to validate the record. This method is meant to be overridden """
return []
def _get_webhook_url(self):
""" Return the webhook url based on the module name. """
baseurl = self.get_base_url()
module_name = self._get_ocr_module_name()
return f'{baseurl}/{module_name}/request_done'
def _get_user_error_invalid_state_message(self):
"""
Returns the message of the UserError when the user tries to send a document in an invalid state.
This method is meant to be overridden.
"""
return ''
def _upload_to_extract_success_callback(self):
""" This method is called when the OCR flow is successful. This method is meant to be overridden """
return None
def _try_to_check_ocr_status(self):
self.ensure_one()
try:
with self.env.cr.savepoint():
self._check_ocr_status()
self.env.cr.commit()
except Exception as e:
_logger.warning("Couldn't check OCR status of %s with id %d: %s", self._name, self.id, str(e))