Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
093793c
chore(deps): bump authlib from 1.6.11 to 1.6.12 in ContentProcessorWo…
Shreyas-Microsoft Jun 1, 2026
68f54be
build(deps): bump idna from 3.11 to 3.15 in ContentProcessorAPI
Shreyas-Microsoft Jun 1, 2026
03bcf44
build(deps): bump idna from 3.11 to 3.15 in ContentProcessor
Shreyas-Microsoft Jun 1, 2026
77de405
chore(deps): bump idna from 3.11 to 3.15 in ContentProcessorWorkflow …
Shreyas-Microsoft Jun 1, 2026
7f176e3
fix: resolve merge conflict in ContentProcessorWorkflow/uv.lock
Prachig-Microsoft Jun 12, 2026
cdf62bd
Merge pull request #606 from microsoft/psl-sw/44960-dependabot-upgrades
Roopan-Microsoft Jun 12, 2026
ec8d7a0
Resolve CodeQL issues
chaudhariniraj Jun 12, 2026
8fc57b8
Resolve test cases error
chaudhariniraj Jun 12, 2026
e11892e
Resolve test cases error 1
chaudhariniraj Jun 12, 2026
cab11be
Resolve test cases error 2
chaudhariniraj Jun 12, 2026
cf8dfc7
Resolve test cases error 3
chaudhariniraj Jun 12, 2026
3f193ba
fix: Resolve CodeQL issues to avoid unsafe use of DefaultAzureCredent…
Avijit-Microsoft Jun 12, 2026
dd66aa7
fix(scoring): show N/A instead of 0% for unavailable entity/schema sc…
Prachig-Microsoft Jun 15, 2026
56c724f
refactor(scoring): structural completeness fallback instead of N/A
Prachig-Microsoft Jun 15, 2026
f9c9955
fix(lint): remove unused DefaultAzureCredential import and fix indent…
Prachig-Microsoft Jun 15, 2026
8d7b592
fix(lint): remove trailing blank line at EOF (W391)
Prachig-Microsoft Jun 15, 2026
1fee0b1
Merge pull request #619 from microsoft/psl-entity-score
Avijit-Microsoft Jun 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ContentProcessor/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ colorama==0.4.6
coverage==7.13.5
cryptography==46.0.7
dnspython==2.8.0
idna==3.11
idna==3.15
iniconfig==2.3.0
isodate==0.7.2
mongomock==4.3.0
Expand Down
110 changes: 92 additions & 18 deletions src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,14 @@ def find_process_result(step_name: str):
)
)

total_evaluated_fields_count = evaluated_result.confidence.get(
"total_evaluated_fields_count", 0
)
schema_score = (
0
if total_evaluated_fields_count == 0
else round(
(
len(evaluated_result.comparison_result.items)
- evaluated_result.confidence["zero_confidence_fields_count"]
)
/ len(evaluated_result.comparison_result.items),
3,
)
# Compute the aggregate scores. Successful (Completed) processing
# always yields numeric scores: when probabilistic confidence is
# available (logprobs from non-reasoning models / Content Understanding
# signal) we use it; otherwise we fall back to a structural
# completeness score (fraction of expected fields actually filled).
# Failed runs and genuinely empty extractions remain at ``0.0``.
entity_score, schema_score, min_extracted_entity_score = (
self._derive_aggregate_scores(evaluated_result)
)

processed_result = ContentProcess(
Expand All @@ -143,11 +137,9 @@ def find_process_result(step_name: str):
self._current_message_context.data_pipeline.pipeline_status.creation_time,
"%Y-%m-%dT%H:%M:%S.%fZ",
),
entity_score=evaluated_result.confidence["overall_confidence"],
entity_score=entity_score,
schema_score=schema_score,
min_extracted_entity_score=evaluated_result.confidence[
"min_extracted_field_confidence"
],
min_extracted_entity_score=min_extracted_entity_score,
prompt_tokens=evaluated_result.prompt_tokens,
completion_tokens=evaluated_result.completion_tokens,
target_schema=Schema.get_schema(
Expand Down Expand Up @@ -241,3 +233,85 @@ def _summarize_processed_time(self, step_results: list[StepResult]) -> str:
# Format the total elapsed time as a string
formatted_elapsed_time = f"{total_hours:02}:{total_minutes:02}:{total_seconds:02}.{total_milliseconds:03}"
return formatted_elapsed_time

@staticmethod
def _is_filled_value(value: object) -> bool:
"""Heuristic: does an extracted value count as "actually filled"?

Treats ``None``, empty strings, whitespace-only strings, and empty
containers as *not* filled. Recursively descends into dicts/lists so a
nested object that contains only nulls is still counted as empty.
"""
if value is None:
return False
if isinstance(value, bool):
return True
if isinstance(value, str):
return value.strip() != ""
if isinstance(value, dict):
return any(SaveHandler._is_filled_value(v) for v in value.values())
if isinstance(value, (list, tuple, set)):
return any(SaveHandler._is_filled_value(v) for v in value)
return True

@staticmethod
def _derive_aggregate_scores(
evaluated_result: DataExtractionResult,
) -> tuple[float, float, float]:
"""Compute ``(entity_score, schema_score, min_extracted_entity_score)``.

Score selection order:

1. **Probabilistic confidence** — when the evaluate step produced
per-field confidence (``total_evaluated_fields_count > 0``), use the
probabilistic ``overall_confidence`` plus the ratio of
above-threshold fields. This is the highest-fidelity signal.

2. **Structural completeness fallback** — when no probabilistic
signal was produced (e.g. reasoning models like ``gpt-5``/``o1``/``o3``
don't return logprobs, and image-only flow has no Content
Understanding signal), but extraction still produced a comparison
table, score by *how much of the schema was actually filled*. This
replaces the old behaviour of falsely emitting ``0%`` for completed
runs that simply lacked logprobs.

3. **Zero** — only when there is literally no extraction data
(failed pipeline / genuinely empty result). Failed processing
continues to surface as ``0`` so the UI consistently renders
``0%`` for failures and genuine zeros.
"""
confidence = evaluated_result.confidence or {}
total_evaluated_fields_count = confidence.get(
"total_evaluated_fields_count", 0
)
comparison_items = (
evaluated_result.comparison_result.items
if evaluated_result.comparison_result is not None
else []
)

# Path 1: probabilistic confidence
if total_evaluated_fields_count > 0 and comparison_items:
zero_count = confidence.get("zero_confidence_fields_count", 0)
schema_score = round(
(len(comparison_items) - zero_count) / len(comparison_items),
3,
)
entity_score = float(confidence.get("overall_confidence") or 0.0)
min_extracted_entity_score = float(
confidence.get("min_extracted_field_confidence") or 0.0
)
return (entity_score, schema_score, min_extracted_entity_score)

# Path 2: structural completeness fallback
if comparison_items:
filled = sum(
1
for item in comparison_items
if SaveHandler._is_filled_value(item.Extracted)
)
ratio = round(filled / len(comparison_items), 3)
return (ratio, ratio, ratio)

# Path 3: nothing to score on
return (0.0, 0.0, 0.0)
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from azure.identity import (
AzureCliCredential,
AzureDeveloperCliCredential,
DefaultAzureCredential,
ManagedIdentityCredential,
)
from azure.identity import (
Expand Down Expand Up @@ -130,7 +129,11 @@ def get_azure_credential():
logging.info(
"[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential"
)
return DefaultAzureCredential()
raise RuntimeError(
"No Azure authentication available. "
"Use Managed Identity in Azure or run "
"'az login' / 'azd auth login' locally."
)
Comment thread
Vamshi-Microsoft marked this conversation as resolved.


def get_async_azure_credential():
Expand Down
7 changes: 5 additions & 2 deletions src/ContentProcessor/src/libs/utils/credential_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from azure.identity import (
AzureCliCredential,
AzureDeveloperCliCredential,
DefaultAzureCredential,
ManagedIdentityCredential,
)
from azure.identity import (
Expand Down Expand Up @@ -130,7 +129,11 @@ def get_azure_credential():
logging.info(
"[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential"
)
return DefaultAzureCredential()
raise RuntimeError(
"No Azure authentication available. "
"Use Managed Identity in Azure or run "
"'az login' / 'azd auth login' locally."
)
Comment thread
Vamshi-Microsoft marked this conversation as resolved.


def get_async_azure_credential():
Expand Down
Loading
Loading