Skip to content

RecursionError: maximum recursion depth exceeded #412

@userException

Description

@userException

Bug 💥
Getting "RecursionError: maximum recursion depth exceeded" error, while following the get started notebook: https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb

Desktop (please complete the following information, if any other than the one in the install requirements):

  • OS: mac OS

Additional context 🧬
Please find the code below.

import deepdoctection as dd
from pathlib import Path
from matplotlib import pyplot as plt
from IPython.core.display import HTML

# Simple PDF extraction example using DeepDoctection
# Make sure you have installed: pip install transformers python-doctr deepdoctection

def extract_pdf_text(pdf_path):
    analyzer = dd.get_dd_analyzer()  # instantiate the built-in analyzer
    df = analyzer.analyze(path=pdf_path)
    df.reset_state()  # Trigger some initialization
    doc = iter(df)
    page = next(doc)
    type(page)
    # print(page.text)

if __name__ == "__main__":
    extract_pdf_text("sample.pdf")

Here are the error logs:

[0622 22:36.56 @file_utils.py:30] INF PyTorch version 2.7.1 available.
[0622 22:36.56 @file_utils.py:93] INF Disabling Tensorflow because USE_TORCH is set
[0622 22:37.02 @dd.py:129] INF Config:
{'DEVICE': device(type='mps'),
'LANGUAGE': None,
'LAYOUT_LINK': {'CHILD_CATEGORIES': [<LayoutType.CAPTION>],
'PARENTAL_CATEGORIES': [<LayoutType.FIGURE>, <LayoutType.TABLE>]},
'LAYOUT_NMS_PAIRS': {'COMBINATIONS': [[<LayoutType.TABLE>, <LayoutType.TITLE>],
[<LayoutType.TABLE>, <LayoutType.TEXT>],
[<LayoutType.TABLE>, <LayoutType.KEY_VALUE_AREA>],
[<LayoutType.TABLE>, <LayoutType.LIST_ITEM>],
[<LayoutType.TABLE>, <LayoutType.LIST>],
[<LayoutType.TABLE>, <LayoutType.FIGURE>],
[<LayoutType.TITLE>, <LayoutType.TEXT>],
[<LayoutType.TEXT>, <LayoutType.KEY_VALUE_AREA>],
[<LayoutType.TEXT>, <LayoutType.LIST_ITEM>],
[<LayoutType.TEXT>, <LayoutType.CAPTION>],
[<LayoutType.KEY_VALUE_AREA>, <LayoutType.LIST_ITEM>],
[<LayoutType.FIGURE>, <LayoutType.CAPTION>]],
'PRIORITY': [<LayoutType.TABLE>, <LayoutType.TABLE>, <LayoutType.TABLE>,
<LayoutType.TABLE>, <LayoutType.TABLE>, <LayoutType.TABLE>,
<LayoutType.TEXT>, <LayoutType.TEXT>, None, <LayoutType.CAPTION>,
<LayoutType.KEY_VALUE_AREA>, <LayoutType.FIGURE>],
'THRESHOLDS': [0.001, 0.01, 0.01, 0.001, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01,
0.01, 0.001]},
'LIB': 'PT',
'OCR': {'CONFIG': {'TESSERACT': 'dd/conf_tesseract.yaml'},
'USE_DOCTR': True,
'USE_TESSERACT': False,
'USE_TEXTRACT': False,
'WEIGHTS': {'DOCTR_RECOGNITION': {'PT': 'doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt',
'TF': 'doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip'},
'DOCTR_WORD': {'PT': 'doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt',
'TF': 'doctr/db_resnet50/tf/db_resnet50-adcafc63.zip'}}},
'PDF_MINER': {'X_TOLERANCE': 3, 'Y_TOLERANCE': 3},
'PT': {'CELL': {'FILTER': None,
'PAD': {'BOTTOM': 60, 'LEFT': 60, 'RIGHT': 60, 'TOP': 60},
'PADDING': False,
'WEIGHTS': 'cell/d2_model_1849999_cell_inf_only.pt',
'WEIGHTS_TS': 'cell/d2_model_1849999_cell_inf_only.ts'},
'ENFORCE_WEIGHTS': {'CELL': False, 'ITEM': False, 'LAYOUT': False},
'ITEM': {'FILTER': ['table'],
'PAD': {'BOTTOM': 60, 'LEFT': 60, 'RIGHT': 60, 'TOP': 60},
'PADDING': False,
'WEIGHTS': 'deepdoctection/tatr_tab_struct_v2/pytorch_model.bin',
'WEIGHTS_TS': 'item/d2_model_1639999_item_inf_only.ts'},
'LAYOUT': {'FILTER': None,
'PAD': {'BOTTOM': 0, 'LEFT': 0, 'RIGHT': 0, 'TOP': 0},
'PADDING': False,
'WEIGHTS': 'Aryn/deformable-detr-DocLayNet/model.safetensors',
'WEIGHTS_TS': 'layout/d2_model_0829999_layout_inf_only.ts'}},
'SEGMENTATION': {'ASSIGNMENT_RULE': 'ioa',
'CELL_NAMES': [<CellType.HEADER>, <CellType.BODY>, <LayoutType.CELL>],
'FULL_TABLE_TILING': True,
'ITEM_NAMES': [<LayoutType.ROW>, <LayoutType.COLUMN>],
'PUBTABLES_CELL_NAMES': [<LayoutType.CELL>],
'PUBTABLES_ITEM_HEADER_CELL_NAMES': [<CellType.COLUMN_HEADER>,
<CellType.ROW_HEADER>,
<CellType.PROJECTED_ROW_HEADER>],
'PUBTABLES_ITEM_HEADER_THRESHOLDS': [0.6, 0.0001],
'PUBTABLES_ITEM_NAMES': [<LayoutType.ROW>, <LayoutType.COLUMN>],
'PUBTABLES_SPANNING_CELL_NAMES': [<CellType.SPANNING>],
'PUBTABLES_SUB_ITEM_NAMES': [<CellType.ROW_NUMBER>, <CellType.COLUMN_NUMBER>],
'REMOVE_IOU_THRESHOLD_COLS': 0.2,
'REMOVE_IOU_THRESHOLD_ROWS': 0.2,
'STRETCH_RULE': 'equal',
'SUB_ITEM_NAMES': [<CellType.ROW_NUMBER>, <CellType.COLUMN_NUMBER>],
'TABLE_NAME': <LayoutType.TABLE>,
'THRESHOLD_COLS': 0.4,
'THRESHOLD_ROWS': 0.4},
'TEXT_CONTAINER': <LayoutType.WORD>,
'TEXT_ORDERING': {'BROKEN_LINE_TOLERANCE': 0.003,
'FLOATING_TEXT_BLOCK_CATEGORIES': (<LayoutType.TEXT>, <LayoutType.TITLE>,
<LayoutType.LIST>,
<LayoutType.KEY_VALUE_AREA>),
'HEIGHT_TOLERANCE': 2.0,
'INCLUDE_RESIDUAL_TEXT_CONTAINER': True,
'PARAGRAPH_BREAK': 0.035,
'STARTING_POINT_TOLERANCE': 0.005,
'TEXT_BLOCK_CATEGORIES': (<LayoutType.TEXT>, <LayoutType.TITLE>,
<LayoutType.LIST_ITEM>, <LayoutType.LIST>,
<LayoutType.CAPTION>, <LayoutType.PAGE_HEADER>,
<LayoutType.PAGE_FOOTER>, <LayoutType.PAGE_NUMBER>,
<LayoutType.MARK>, <LayoutType.KEY_VALUE_AREA>,
<LayoutType.FIGURE>, <CellType.SPANNING>,
<LayoutType.CELL>)},
'TF': {'CELL': {'FILTER': None, 'WEIGHTS': 'cell/model-1800000_inf_only.data-00000-of-00001'},
'ITEM': {'FILTER': None, 'WEIGHTS': 'item/model-1620000_inf_only.data-00000-of-00001'},
'LAYOUT': {'FILTER': None, 'WEIGHTS': 'layout/model-800000_inf_only.data-00000-of-00001'}},
'USE_LAYOUT': True,
'USE_LAYOUT_LINK': False,
'USE_LAYOUT_NMS': True,
'USE_LINE_MATCHER': False,
'USE_OCR': True,
'USE_PDF_MINER': False,
'USE_ROTATOR': False,
'USE_TABLE_REFINEMENT': False,
'USE_TABLE_SEGMENTATION': True,
'WORD_MATCHING': {'MAX_PARENT_ONLY': True,
'PARENTAL_CATEGORIES': (<LayoutType.TEXT>, <LayoutType.TITLE>,
<LayoutType.LIST_ITEM>, <LayoutType.LIST>,
<LayoutType.CAPTION>, <LayoutType.PAGE_HEADER>,
<LayoutType.PAGE_FOOTER>, <LayoutType.PAGE_NUMBER>,
<LayoutType.MARK>, <LayoutType.KEY_VALUE_AREA>,
<LayoutType.FIGURE>, <CellType.SPANNING>,
<LayoutType.CELL>),
'RULE': 'ioa',
'THRESHOLD': 0.3}}
[0622 22:37.03 @fs.py:138] INF File db_resnet50-ac60cadc.pt exists! Skip download.
Traceback (most recent call last):
File "/Users/john/Code/Python/DeepDoc/.projectenv/lib/python3.12/site-packages/deepdoctection/utils/file_utils.py", line 667, in get_doctr_requirement
return get_doctr_requirement()
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/john/Code/Python/DeepDoc/.projectenv/lib/python3.12/site-packages/deepdoctection/utils/file_utils.py", line 667, in get_doctr_requirement
return get_doctr_requirement()
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/john/Code/Python/DeepDoc/.projectenv/lib/python3.12/site-packages/deepdoctection/utils/file_utils.py", line 667, in get_doctr_requirement
return get_doctr_requirement()
^^^^^^^^^^^^^^^^^^^^^^^
[Previous line repeated 995 more times]
File "/Users/john/Code/Python/DeepDoc/.projectenv/lib/python3.12/site-packages/deepdoctection/utils/file_utils.py", line 666, in get_doctr_requirement
if not get_poppler_version():
^^^^^^^^^^^^^^^^^^^^^
File "/Users/john/Code/Python/DeepDoc/.projectenv/lib/python3.12/site-packages/deepdoctection/utils/file_utils.py", line 428, in get_poppler_version
if pdf_to_ppm_available():
^^^^^^^^^^^^^^^^^^^^^^
RecursionError: maximum recursion depth exceeded

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions