From ca398bdb24db3430e72f53847c7dcf1562be94b3 Mon Sep 17 00:00:00 2001 From: Gallei Date: Tue, 22 Aug 2023 10:45:52 +0200 Subject: [PATCH] Determine information about merged cells --- .../org/dhatim/fastexcel/reader/Cell.java | 15 +++- .../reader/MergeCellSpliterator.java | 80 +++++++++++++++++++ .../fastexcel/reader/ReadableWorkbook.java | 9 ++- .../fastexcel/reader/RowSpliterator.java | 44 ++++++---- .../fastexcel/reader/MergeCellTest.java | 70 ++++++++++++++++ 5 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/MergeCellSpliterator.java create mode 100644 fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/MergeCellTest.java diff --git a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/Cell.java b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/Cell.java index c21b7239..3c5fc1ed 100644 --- a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/Cell.java +++ b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/Cell.java @@ -32,19 +32,21 @@ public class Cell { private final String rawValue; private final String dataFormatId; private final String dataFormatString; + private final CellAddress mergedCellAddress; - Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue) { - this(workbook, type, value, address, formula, rawValue, null, null); + Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue, CellAddress mergedCellAddress) { + this(workbook, type, value, address, formula, rawValue, mergedCellAddress, null, null); } Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue, - String dataFormatId, String dataFormatString) { + CellAddress mergedCellAddress, String dataFormatId, String dataFormatString) { this.workbook = workbook; this.type = type; this.value = value; this.address = address; this.formula = formula; this.rawValue = rawValue; + this.mergedCellAddress = mergedCellAddress; this.dataFormatId = dataFormatId; this.dataFormatString = dataFormatString; } @@ -170,4 +172,11 @@ public String toString() { return sb.append(']').toString(); } + public boolean isMerged() { + return mergedCellAddress != null; + } + + public CellAddress getMergedCellAddress() { + return mergedCellAddress; + } } diff --git a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/MergeCellSpliterator.java b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/MergeCellSpliterator.java new file mode 100644 index 00000000..3e6d135b --- /dev/null +++ b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/MergeCellSpliterator.java @@ -0,0 +1,80 @@ +/* + * Copyright 2016 Dhatim. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.dhatim.fastexcel.reader; + +import static org.dhatim.fastexcel.reader.DefaultXMLInputFactory.factory; + +import java.io.InputStream; +import java.util.NoSuchElementException; +import java.util.Spliterator; +import java.util.function.Consumer; +import javax.xml.stream.XMLStreamException; + +class MergeCellSpliterator implements Spliterator { + + private final SimpleXmlReader r; + + public MergeCellSpliterator(InputStream inputStream) throws XMLStreamException { + this.r = new SimpleXmlReader(factory, inputStream); + } + + @Override + public boolean tryAdvance(Consumer action) { + try { + if (hasNext()) { + action.accept(next()); + return true; + } else { + return false; + } + } catch (XMLStreamException e) { + throw new ExcelReaderException(e); + } + } + + @Override + public Spliterator trySplit() { + return null; + } + + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public int characteristics() { + return DISTINCT | IMMUTABLE | NONNULL | ORDERED; + } + + private boolean hasNext() throws XMLStreamException { + if (r.goTo(() -> r.isStartElement("mergeCell") || r.isEndElement("mergeCells"))) { + return "mergeCell".equals(r.getLocalName()); + } else { + return false; + } + } + + + private CellRangeAddress next() { + if (!"mergeCell".equals(r.getLocalName())) { + throw new NoSuchElementException(); + } + + String ref = r.getAttribute("ref"); + return CellRangeAddress.valueOf(ref); + } +} diff --git a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/ReadableWorkbook.java b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/ReadableWorkbook.java index a02099b2..f5080569 100644 --- a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/ReadableWorkbook.java +++ b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/ReadableWorkbook.java @@ -15,6 +15,7 @@ */ package org.dhatim.fastexcel.reader; +import java.util.stream.Collectors; import javax.xml.stream.XMLStreamException; import java.io.*; import java.util.ArrayList; @@ -145,7 +146,13 @@ private void createSheet(SimpleXmlReader r) { Stream openStream(Sheet sheet) throws IOException { try { InputStream inputStream = pkg.getSheetContent(sheet); - Stream stream = StreamSupport.stream(new RowSpliterator(this, inputStream), false); + + Stream mergedCellStream = StreamSupport.stream(new MergeCellSpliterator(inputStream), false); + List mergedCells = mergedCellStream.onClose(asUncheckedRunnable(inputStream)).collect(Collectors.toList()); + + inputStream = pkg.getSheetContent(sheet); + + Stream stream = StreamSupport.stream(new RowSpliterator(this, mergedCells, inputStream), false); return stream.onClose(asUncheckedRunnable(inputStream)); } catch (XMLStreamException e) { throw new IOException(e); diff --git a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/RowSpliterator.java b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/RowSpliterator.java index b7fcd04b..685a2934 100644 --- a/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/RowSpliterator.java +++ b/fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/RowSpliterator.java @@ -31,10 +31,13 @@ class RowSpliterator implements Spliterator { private final HashMap sharedFormula = new HashMap<>(); private final HashMap arrayFormula = new HashMap<>(); + + private final List mergedCells; private int rowCapacity = 16; - public RowSpliterator(ReadableWorkbook workbook, InputStream inputStream) throws XMLStreamException { + public RowSpliterator(ReadableWorkbook workbook, List mergedCells, InputStream inputStream) throws XMLStreamException { this.workbook = workbook; + this.mergedCells = mergedCells; this.r = new SimpleXmlReader(factory, inputStream); r.goTo("sheetData"); @@ -105,6 +108,16 @@ private Row next() throws XMLStreamException { private Cell parseCell() throws XMLStreamException { String cellRef = r.getAttribute("r"); CellAddress addr = new CellAddress(cellRef); + + CellAddress mergedCellAddress = null; + + for(CellRangeAddress mergedCell : mergedCells) { + if (mergedCell.isInRange(addr.getRow(), addr.getColumn())) { + mergedCellAddress = new CellAddress(mergedCell.getFirstRow(), mergedCell.getFirstColumn()); + break; + } + } + String type = r.getOptionalAttribute("t").orElse("n"); String styleString = r.getAttribute("s"); String formatId = null; @@ -118,15 +131,15 @@ private Cell parseCell() throws XMLStreamException { } if ("inlineStr".equals(type)) { - return parseInlineStr(addr); + return parseInlineStr(addr, mergedCellAddress); } else if ("s".equals(type)) { - return parseString(addr); + return parseString(addr, mergedCellAddress); } else { - return parseOther(addr, type, formatId, formatString); + return parseOther(addr, type, formatId, formatString, mergedCellAddress); } } - private Cell parseOther(CellAddress addr, String type, String dataFormatId, String dataFormatString) + private Cell parseOther(CellAddress addr, String type, String dataFormatId, String dataFormatString, CellAddress mergedCellAddress) throws XMLStreamException { CellType definedType = parseType(type); Function parser = getParserForType(definedType); @@ -174,10 +187,10 @@ private Cell parseOther(CellAddress addr, String type, String dataFormatId, Stri } if (formula == null && value == null && definedType == CellType.NUMBER) { - return new Cell(workbook, CellType.EMPTY, null, addr, null, rawValue); + return new Cell(workbook, CellType.EMPTY, null, addr, null, rawValue, mergedCellAddress); } else { CellType cellType = (formula != null) ? CellType.FORMULA : definedType; - return new Cell(workbook, cellType, value, addr, formula, rawValue, dataFormatId, dataFormatString); + return new Cell(workbook, cellType, value, addr, formula, rawValue, mergedCellAddress, dataFormatId, dataFormatString); } } @@ -263,28 +276,28 @@ private String shiftCell(String cellID, Integer dCol, Integer dRow) { } - private Cell parseString(CellAddress addr) throws XMLStreamException { + private Cell parseString(CellAddress addr, CellAddress mergedCellAddress) throws XMLStreamException { r.goTo(() -> r.isStartElement("v") || r.isEndElement("c")); if (r.isEndElement("c")) { - return empty(addr, CellType.STRING); + return empty(addr, CellType.STRING, mergedCellAddress); } String v = r.getValueUntilEndElement("v"); if (v.isEmpty()) { - return empty(addr, CellType.STRING); + return empty(addr, CellType.STRING, mergedCellAddress); } int index = Integer.parseInt(v); String sharedStringValue = workbook.getSharedStringsTable().getItemAt(index); Object value = sharedStringValue; String formula = null; String rawValue = sharedStringValue; - return new Cell(workbook, CellType.STRING, value, addr, formula, rawValue); + return new Cell(workbook, CellType.STRING, value, addr, formula, rawValue, mergedCellAddress); } - private Cell empty(CellAddress addr, CellType type) { - return new Cell(workbook, type, "", addr, null, ""); + private Cell empty(CellAddress addr, CellType type, CellAddress mergedCellAddress) { + return new Cell(workbook, type, "", addr, null, "", mergedCellAddress); } - private Cell parseInlineStr(CellAddress addr) throws XMLStreamException { + private Cell parseInlineStr(CellAddress addr, CellAddress mergedCellAddress) throws XMLStreamException { Object value = null; String formula = null; String rawValue = null; @@ -299,7 +312,7 @@ private Cell parseInlineStr(CellAddress addr) throws XMLStreamException { } } CellType cellType = formula == null ? CellType.STRING : CellType.FORMULA; - return new Cell(workbook, cellType, value, addr, formula, rawValue); + return new Cell(workbook, cellType, value, addr, formula, rawValue, mergedCellAddress); } private Optional getArrayFormula(CellAddress addr) { @@ -368,5 +381,4 @@ private static void ensureSize(List list, int newSize) { list.add(null); } } - } diff --git a/fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/MergeCellTest.java b/fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/MergeCellTest.java new file mode 100644 index 00000000..72ac1c9b --- /dev/null +++ b/fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/MergeCellTest.java @@ -0,0 +1,70 @@ +/* + * Copyright 2016 Dhatim. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.dhatim.fastexcel.reader; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +class MergeCellTest { + @org.junit.jupiter.api.Test + void test() throws IOException { + try (InputStream is = Resources.open("/xlsx/merge_cells.xlsx"); ReadableWorkbook wb = new ReadableWorkbook(is)) { + Sheet sheet = wb.getFirstSheet(); + Map rowMap = new HashMap<>(); + try (Stream rows = sheet.openStream()) { + rows.forEach(r -> rowMap.put(r.getRowNum(), r)); + } + + Row r1 = rowMap.get(1); + assertTrue(r1.getCell(0).isMerged()); + assertEquals(0, r1.getCell(0).getMergedCellAddress().getRow()); + assertEquals(0, r1.getCell(0).getMergedCellAddress().getColumn()); + assertTrue(r1.getCell(1).isMerged()); + assertEquals(0, r1.getCell(1).getMergedCellAddress().getRow()); + assertEquals(0, r1.getCell(1).getMergedCellAddress().getColumn()); + assertTrue(r1.getCell(2).isMerged()); + assertEquals(0, r1.getCell(2).getMergedCellAddress().getRow()); + assertEquals(2, r1.getCell(2).getMergedCellAddress().getColumn()); + Row r2 = rowMap.get(2); + assertTrue(r2.getCell(0).isMerged()); + assertEquals(0, r2.getCell(0).getMergedCellAddress().getRow()); + assertEquals(0, r2.getCell(0).getMergedCellAddress().getColumn()); + assertTrue(r2.getCell(1).isMerged()); + assertEquals(0, r2.getCell(1).getMergedCellAddress().getRow()); + assertEquals(0, r2.getCell(1).getMergedCellAddress().getColumn()); + assertTrue(r2.getCell(2).isMerged()); + assertEquals(0, r2.getCell(2).getMergedCellAddress().getRow()); + assertEquals(2, r2.getCell(2).getMergedCellAddress().getColumn()); + Row r3 = rowMap.get(3); + assertTrue(r3.getCell(0).isMerged()); + assertEquals(2, r3.getCell(0).getMergedCellAddress().getRow()); + assertEquals(0, r3.getCell(0).getMergedCellAddress().getColumn()); + assertTrue(r3.getCell(1).isMerged()); + assertEquals(2, r3.getCell(1).getMergedCellAddress().getRow()); + assertEquals(0, r3.getCell(1).getMergedCellAddress().getColumn()); + assertFalse(r3.getCell(2).isMerged()); + assertNull(r3.getCell(2).getMergedCellAddress()); + } + } +}