Source code for ferc_xbrl_extractor.arelle_interface

"""Abstract away interface to Arelle XBRL Library."""

import io
import time
from pathlib import Path
from typing import Literal

import pydantic
import stringcase
from arelle import Cntlr, FileSource, ModelManager, ModelXbrl, XbrlConst
from arelle.ModelDtsObject import ModelConcept
from arelle.ViewFileRelationshipSet import ViewRelationshipSet
from pydantic import BaseModel


[docs] def _taxonomy_view(taxonomy_source: str | FileSource.FileSource, max_retries: int = 7): """Actually use Arelle to get a taxonomy and its relationships.""" cntlr = Cntlr.Cntlr() cntlr.startLogging(logFileName="logToPrint") model_manager = ModelManager.initialize(cntlr) for try_count in range(max_retries): try: cntlr.logger.debug(f"Try #{try_count}: {taxonomy_source=}") taxonomy = ModelXbrl.load(model_manager, taxonomy_source) continue except FileExistsError as e: if (try_count + 1) == max_retries: raise e backoff = 2 ** (try_count + 1) cntlr.logger.warning(f"Failed try #{try_count}, retrying in {backoff}s") time.sleep(backoff) view = ViewRelationshipSet(taxonomy, "taxonomy.json", "roles", None, None, None) view.view(XbrlConst.parentChild, None, None, None) return taxonomy, view
[docs] def load_taxonomy(path: Path): """Load XBRL taxonomy, and parse relationships. Args: path: URL or local path pointing to an XBRL taxonomy. """ # arelle only works with `str`, not `Path` - as of version 2.12.2 source = str(path) return _taxonomy_view(source)
[docs] def load_taxonomy_from_archive(taxonomy_archive: io.BytesIO, entry_point: Path): """Load an XBRL taxonomy from a zipfile archive. Args: taxonomy_archive: In memory taxonomy archive. entry_point: Relative path to taxonomy entry point within archive. """ file_source = FileSource.openFileSource( str(entry_point), sourceZipStream=taxonomy_archive ) return _taxonomy_view(file_source)
[docs] class References(BaseModel): """Pydantic model that defines XBRL references. FERC uses XBRL references to link Concepts defined in its taxonomy to the physical paper form. These are included in the output metadata and can be useful for linking between XBRL and DBF data. This model is not a generic representation of XBRL references, but specific to those used by FERC. """
[docs] account: str = pydantic.Field(None, alias="Account")
[docs] form_location: list[dict[str, str]] = pydantic.Field([], alias="Form Location")
[docs] class Calculation(BaseModel): """Pydantic model that defines XBRL calculations. XBRL calculation relationships are also included in the metadata. Calculations are a validation tool used to define relationships between facts using some mathematical formula. For example, a calculation relationship might denote that one fact is equal to the sum of 2 or more other facts, and this relationship can be used to validate a filing. """
[docs] name: str
[docs] weight: float
[docs] class Metadata(BaseModel): """Pydantic model that defines metadata extracted from XBRL taxonomies. Taxonomies contain various metedata which are useful for interpreting XBRL filings. The metadata fields being extracted here include references, calculations, and balances. """
[docs] name: str
[docs] references: References
[docs] calculations: list[Calculation]
[docs] balance: Literal["credit", "debit"] | None = None
@classmethod
[docs] def from_concept(cls, concept: ModelConcept) -> "Metadata": """Get metadata for a single XBRL Concept. This function will create a Metadata object with metadata extracted for a single Concept. Args: concept: Concept to extract metadata from. """ # Get name and convert to snakecase to match output DB name = stringcase.snakecase(concept.name) concept_metadata = {"name": name} references = concept.modelXbrl.relationshipSet( XbrlConst.conceptReference ).fromModelObject(concept) # Loop through all references and add to metadata reference_dict = {} for reference in references: reference = reference.toModelObject reference_name = reference.modelXbrl.roleTypeDefinition(reference.role) # Several values can make up a single reference. Create a dictionary with these part_dict = { part.localName: part.stringValue for part in reference.iterchildren() } # There can also be several references with the same name, so store in list if reference_name in reference_dict: reference_dict[reference_name].append(part_dict) else: reference_dict[reference_name] = [part_dict] # Flatten out references where applicable if ( len(reference_dict[reference_name]) == 1 and len(part_dict) == 1 and reference_name in part_dict ): reference_dict[reference_name] = part_dict[reference_name] # Add references to metadata concept_metadata["references"] = reference_dict # Get calculations calculations = concept.modelXbrl.relationshipSet( XbrlConst.summationItem ).fromModelObject(concept) calculation_list = [] for calculation in calculations: calculation_list.append( { "name": stringcase.snakecase(calculation.toModelObject.name), "weight": calculation.weight, } ) concept_metadata["calculations"] = calculation_list concept_metadata["balance"] = concept.balance return cls(**concept_metadata)