Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { NotebookPair } from '../../types/common';

/**
* Asserts that source and codex notebooks have matching cell counts
*/
export function assertMatchingCellCounts(pair: NotebookPair): void {
const count = (notebook: typeof pair.source) => {
const parts = { topLevel: 0, paratext: 0, child: 0 };
for (const cell of notebook.cells) {
const isParatext = cell.metadata?.type === 'paratext';
const isChild = cell.metadata?.isChild === true || cell.id.split(':').length > 2;
if (isParatext) parts.paratext++;
else if (isChild) parts.child++;
else parts.topLevel++;
}
return parts;
};

const s = count(pair.source);
const c = count(pair.codex);

if (s.topLevel !== c.topLevel || s.paratext !== c.paratext || s.child !== c.child || pair.source.cells.length !== pair.codex.cells.length) {
throw new Error(`Cell count mismatch: topLevel ${s.topLevel}/${c.topLevel}, paratext ${s.paratext}/${c.paratext}, child ${s.child}/${c.child}, total ${pair.source.cells.length}/${pair.codex.cells.length}`);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { describe, it, expect } from 'vitest';
import { subtitlesImporter } from '../subtitles/index';
import { assertMatchingCellCounts } from './cellCountUtils';

class MockFile {
readonly lastModified: number;
readonly name: string;
readonly size: number;
readonly type: string;
readonly webkitRelativePath: string = '';
private readonly _content: string;
arrayBuffer(): Promise<ArrayBuffer> { throw new Error('Not implemented'); }
slice(): Blob { throw new Error('Not implemented'); }
stream(): ReadableStream<any> { throw new Error('Not implemented'); }
constructor(content: string, name: string, type = 'text/vtt') {
this._content = content;
this.name = name;
this.type = type;
this.size = content.length;
this.lastModified = Date.now();
}
text(): Promise<string> { return Promise.resolve(this._content); }
[Symbol.toStringTag]: string = 'File';
}

describe('Subtitles Importer - Cell Count Consistency', () => {
it('should produce matching cell counts when same file imported twice', async () => {
const vtt = `WEBVTT

00:00:30.697 --> 00:00:31.990
The first time?

00:00:32.783 --> 00:00:34.785
You know the first time.`;

const file = new MockFile(vtt, 'test.vtt') as unknown as File;
const result1 = await subtitlesImporter.parseFile(file);
const result2 = await subtitlesImporter.parseFile(file);

expect(result1.success && result2.success).toBe(true);
assertMatchingCellCounts(result1.notebookPair!);
assertMatchingCellCounts(result2.notebookPair!);
expect(result1.notebookPair!.source.cells.length).toBe(result2.notebookPair!.source.cells.length);
expect(result1.notebookPair!.source.cells[0].id).toBe(result2.notebookPair!.source.cells[0].id);
});
});

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { describe, it, expect } from 'vitest';
import { parseFile } from '../usfm/index';
import { assertMatchingCellCounts } from './cellCountUtils';

class MockFile {
readonly lastModified: number;
readonly name: string;
readonly size: number;
readonly type: string;
readonly webkitRelativePath: string = '';
private readonly _content: string;
arrayBuffer(): Promise<ArrayBuffer> { throw new Error('Not implemented'); }
slice(): Blob { throw new Error('Not implemented'); }
stream(): ReadableStream<any> { throw new Error('Not implemented'); }
constructor(content: string, name: string, type = 'text/plain') {
this._content = content;
this.name = name;
this.type = type;
this.size = content.length;
this.lastModified = Date.now();
}
text(): Promise<string> { return Promise.resolve(this._content); }
[Symbol.toStringTag]: string = 'File';
}

describe('USFM Importer - Cell Count Consistency', () => {
it('should produce matching cell counts when same file imported twice', async () => {
const usfm = `\\id GEN
\\c 1
\\v 1 In the beginning God created the heavens and the earth.
\\p
\\v 2 Now the earth was formless and empty.`;

const file = new MockFile(usfm, 'GEN.usfm') as unknown as File;
const result1 = await parseFile(file);
const result2 = await parseFile(file);

expect(result1.success && result2.success).toBe(true);
assertMatchingCellCounts(result1.notebookPair!);
assertMatchingCellCounts(result2.notebookPair!);
expect(result1.notebookPair!.source.cells.length).toBe(result2.notebookPair!.source.cells.length);
});
});

Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,19 @@ import { validateFootnotes } from '../../utils/footnoteUtils';
// Deprecated: dynamic import of usfm-grammar. Replaced by lightweight regex parser.
export const initializeUsfmGrammar = async () => { };

/**
* Simple hash function for deterministic ID generation
*/
const simpleHash = (str: string): string => {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return Math.abs(hash).toString(36).slice(0, 8);
};

export interface UsfmContent {
id: string;
content: string;
Expand Down Expand Up @@ -132,6 +145,7 @@ export const processUsfmContent = async (
chapters.add(chapterNumber);

let seenFirstVerseInChapter = false;
let paratextIndex = 0;
chapter.contents.forEach((content: any) => {
if (content.verseNumber !== undefined && content.verseText !== undefined) {
// This is a verse - process it for footnotes
Expand All @@ -158,6 +172,7 @@ export const processUsfmContent = async (
});

// Create child cells for milestone spans (e.g., qt, ts)
let childIndex = 0;
try {
const milestoneTags = new Set(['qt', 'ts']);
const parser = new DOMParser();
Expand All @@ -169,8 +184,9 @@ export const processUsfmContent = async (
const el = node as HTMLElement;
const tag = el.getAttribute('data-tag');
if (tag && milestoneTags.has(tag)) {
const childId = `${verseId}:${Math.random().toString(36).slice(2, 11)}`;
const innerHtml = el.innerHTML;
const childId = `${verseId}:child-${childIndex}`;
childIndex++;
usfmContent.push({
id: childId,
content: innerHtml,
Expand All @@ -193,15 +209,17 @@ export const processUsfmContent = async (
};
Array.from(container.childNodes).forEach(walker);
}
} catch {
// ignore child extraction errors
} catch (error) {
console.warn('Error creating child cells:', error);
}

seenFirstVerseInChapter = true;
} else if (content.text && !content.marker) {
// This is paratext (content without specific markers)
const paratextId = `${bookCode} ${chapterNumber}:${Math.random().toString(36).slice(2, 10)}`;
const paratextContent = content.text.trim();
const contentHash = simpleHash(paratextContent);
const paratextId = `${bookCode} ${chapterNumber}:paratext-${paratextIndex}-${contentHash}`;
paratextIndex++;
const htmlParatext = paratextContent.length > 0 ? `<p data-tag="p">${convertUsfmInlineMarkersToHtml(paratextContent)}</p>` : paratextContent;

// Convert USFM to HTML with footnotes if needed
Expand Down Expand Up @@ -234,8 +252,10 @@ export const processUsfmContent = async (
});
} else if (content.marker) {
// Preserve raw marker lines as paratext for round-trip fidelity
const paratextId = `${bookCode} ${chapterNumber}:${Math.random().toString(36).slice(2, 10)}`;
const markerLine = String(content.marker).trim();
const contentHash = simpleHash(markerLine);
const paratextId = `${bookCode} ${chapterNumber}:paratext-${paratextIndex}-${contentHash}`;
paratextIndex++;
const htmlBlock = usfmBlockToHtml(markerLine);
// Determine if style-only (no inner text)
let cellType: UsfmContent['type'] = 'text';
Expand Down Expand Up @@ -281,6 +301,8 @@ export const processUsfmContent = async (
cellLabel: item.metadata.verse !== undefined ? item.metadata.verse?.toString() : undefined,
originalText: item.metadata.originalText,
fileName: item.metadata.fileName,
isChild: item.metadata.isChild,
parentId: item.metadata.parentId,
});
});

Expand Down Expand Up @@ -381,7 +403,9 @@ export const createNotebookPair = (
id: sourceCell.id,
content: isStyleCell ? sourceCell.content : '',
images: sourceCell.images,
metadata: sourceCell.metadata,
metadata: {
...sourceCell.metadata,
},
};
});

Expand Down
Loading