Source code for syelink.parser

"""ASC file parser for EyeLink eye tracking data.

Extracts calibrations, validations, and recordings from ASC files.
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from typing import Any
# Note: Path is used at runtime so it stays at top-level


[docs] def find_all_segments(asc_file: str | Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]: """Extract complete calibration, validation, and recording blocks from an ASC file. Args: asc_file: Path to the ASC file Returns: Tuple of (calibrations, validations, recordings) where each is a list of dicts with 'timestamp' and 'text' keys (recordings also have 'start' and 'end'). """ calibrations: list[dict[str, Any]] = [] validations: list[dict[str, Any]] = [] recordings: list[dict[str, Any]] = [] current_cal_block: dict[str, Any] | None = None current_val_block: dict[str, Any] | None = None current_rec_block: dict[str, Any] | None = None with Path(asc_file).open(encoding="utf-8") as f: for line in f: # CALIBRATION START if ">>>>>>> CALIBRATION" in line and "FOR" in line: # If we already have a block with results, this is a new calibration if current_cal_block is not None and current_cal_block.get("has_results", False): calibrations.append({ "timestamp": current_cal_block["timestamp"], "text": "".join(current_cal_block["lines"]), }) current_cal_block = None if current_cal_block is None: current_cal_block = {"lines": [], "timestamp": None, "has_results": False} current_cal_block["lines"].append(line) continue # CALIBRATION RESULT cal_result = re.match(r"MSG\s+(\d+)\s+!CAL CALIBRATION ([A-Z0-9]+) ([\w-]+) (LEFT|RIGHT)\s+(\w+)", line) if cal_result and current_cal_block is not None: current_cal_block["lines"].append(line) current_cal_block["has_results"] = True if current_cal_block["timestamp"] is None: current_cal_block["timestamp"] = int(cal_result.group(1)) continue # INPUT after calibration closes the calibration block if line.startswith("INPUT") and current_cal_block is not None: current_cal_block["lines"].append(line) calibrations.append({ "timestamp": current_cal_block["timestamp"], "text": "".join(current_cal_block["lines"]), }) current_cal_block = None continue # If we're in a calibration block, collect MSG lines and continuation lines (not gaze samples) if current_cal_block is not None: # Skip gaze sample data (lines starting with a digit timestamp) if line and line[0].isdigit(): continue # Collect MSG lines and their continuation lines (indented with whitespace) if line.startswith("MSG") or (line and line[0].isspace()): current_cal_block["lines"].append(line) time_match = re.match(r"MSG\s+(\d+)", line) if time_match and current_cal_block["timestamp"] is None: current_cal_block["timestamp"] = int(time_match.group(1)) continue # VALIDATION START val_start = re.match(r"MSG\s+(\d+)\s+!CAL VALIDATION ([A-Z0-9]+) ([\w-]+) (LEFT|RIGHT)\s+(\w+)", line) if val_start: if current_val_block is None: current_val_block = {"timestamp": int(val_start.group(1)), "lines": [line]} else: current_val_block["lines"].append(line) continue # Collect validation-related MSG lines if ( current_val_block is not None and line.startswith("MSG") and ("VALIDATE" in line or "!CAL VALIDATION" in line) ): current_val_block["lines"].append(line) continue # Close validation block on meaningful boundaries (not gaze sample data) if current_val_block is not None and ( line.startswith(("START", "INPUT")) or ">>>>>>> CALIBRATION" in line ): validations.append({ "timestamp": current_val_block["timestamp"], "text": "".join(current_val_block["lines"]), }) current_val_block = None # RECORDING START if line.startswith("START"): start_match = re.match(r"START\s+(\d+)", line) if start_match: current_rec_block = {"start": int(start_match.group(1)), "end": None, "lines": [line]} continue # If we're in a recording block, collect all lines if current_rec_block is not None: current_rec_block["lines"].append(line) if line.startswith("END"): end_match = re.match(r"END\s+(\d+)", line) if end_match: current_rec_block["end"] = int(end_match.group(1)) recordings.append({ "start": current_rec_block["start"], "end": current_rec_block["end"], "text": "".join(current_rec_block["lines"]), }) current_rec_block = None continue # Close any unclosed validation block at end of file if current_val_block is not None: validations.append({ "timestamp": current_val_block["timestamp"], "text": "".join(current_val_block["lines"]), }) return calibrations, validations, recordings