テキストを走査する
備忘録も兼ねてPowerPointプレゼンテーション内のテキストを走査するためのクラスを作成しました。
走査する単位はCharacter, Run, Word, Line, Sentence, Pragraphから選択できます。
using PowerPoint = Microsoft.Office.Interop.PowerPoint; using Office = Microsoft.Office.Core; namespace TorasenLib { public class TextTraverser { public enum TraverseUnit { Character, Run, Word, Line, Sensence, Paragraph }; public delegate void ForEachRun(Office.TextRange2 unit); private delegate Office.TextRange2 Decompose(Office.TextRange2 textrange, int start, int length); private readonly ForEachRun procedure_; private readonly Decompose decompose_; public TextTraverser(ForEachRun procedure, TraverseUnit unit) { if (procedure == null) { throw new System.ArgumentNullException("procedure"); } procedure_ = procedure; switch (unit) { case TraverseUnit.Character: decompose_ = new Decompose(GetCharacters_); break; case TraverseUnit.Run: decompose_ = new Decompose(GetRuns_); break; case TraverseUnit.Word: decompose_ = new Decompose(GetWords_); break; case TraverseUnit.Line: decompose_ = new Decompose(GetLines_); break; case TraverseUnit.Sensence: decompose_ = new Decompose(GetSentences_); break; case TraverseUnit.Paragraph: decompose_ = new Decompose(GetParagraphs_); break; default: System.Diagnostics.Debug.Assert(false, "should never get here"); break; } } public void Traverse(PowerPoint.Presentations presentations) { foreach (PowerPoint.Presentation presentation in presentations) { Traverse(presentation); } } public void Traverse(PowerPoint.Presentation presentation) { Traverse(presentation.Slides); } public void Traverse(PowerPoint.Slides slides) { foreach (PowerPoint.Slide slide in slides) { Traverse(slide); } } public void Traverse(PowerPoint.SlideRange sliderange) { foreach (PowerPoint.Slide slide in sliderange) { Traverse(slide); } } public void Traverse(PowerPoint.Slide slide) { Traverse(slide.Shapes); } public void Traverse(PowerPoint.Shapes shapes) { foreach (PowerPoint.Shape shape in shapes) { Traverse(shape); } } public void Traverse(PowerPoint.ShapeRange shaperange) { foreach (PowerPoint.Shape shape in shaperange) { Traverse(shape); } } public void Traverse(PowerPoint.Shape shape) { if (shape.HasTextFrame == Office.MsoTriState.msoTrue) { Traverse(shape.TextFrame2); } if (shape.HasTable == Office.MsoTriState.msoTrue) { foreach (PowerPoint.Row row in shape.Table.Rows) { foreach (PowerPoint.Cell cell in row.Cells) { if (cell.Shape.HasTextFrame == Office.MsoTriState.msoTrue) { Traverse(cell.Shape.TextFrame2); } } } } if (shape.Type == Office.MsoShapeType.msoGroup) { foreach (PowerPoint.Shape child in shape.GroupItems) { Traverse(child); } } } private void Traverse(PowerPoint.TextFrame2 textframe2) { if (textframe2.HasText == Office.MsoTriState.msoTrue) { Traverse(textframe2.TextRange); } } private void Traverse(Office.TextRange2 textrange2) { System.Diagnostics.Debug.Assert(procedure_ != null); System.Diagnostics.Debug.Assert(decompose_ != null); int length = 0; int idx = 1; while (length < textrange2.Length) { Office.TextRange2 unit = decompose_(textrange2, idx, -1); procedure_(unit); length += unit.Length; ++idx; } } private Office.TextRange2 GetCharacters_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Characters(start, length); } private Office.TextRange2 GetRuns_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Runs(start, length); } private Office.TextRange2 GetWords_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Words(start, length); } private Office.TextRange2 GetLines_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Lines(start, length); } private Office.TextRange2 GetSentences_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Sentences(start, length); } private Office.TextRange2 GetParagraphs_(Office.TextRange2 textrange, int start, int length) { return textrange.get_Paragraphs(start, length); } } }
以下はRun単位で走査し、奇数番目に走査したテキストに二重線を設定する例です。
using PowerPoint = Microsoft.Office.Interop.PowerPoint; using Office = Microsoft.Office.Core; public void Test(PowerPoint.Presentation presentation) { bool doubleStrike = true; TorasenLib.TextTraverser.ForEachRun procedure = new TorasenLib.TextTraverser.ForEachRun( delegate(Office.TextRange2 unit) { if (doubleStrike) { unit.Font.DoubleStrikeThrough = Office.MsoTriState.msoTrue; } doubleStrike = !doubleStrike; }); TorasenLib.TextTraverser.TraverseUnit traverseUnit = TorasenLib.TextTraverser.TraverseUnit.Run; TorasenLib.TextTraverser traverser = new TorasenLib.TextTraverser(procedure, traverseUnit); traverser.Traverse(presentation); }
それぞれの走査単位での実行結果。
Characterは1文字ごと、Runはフォント情報が等しいブロックごと、Wordは単語ごと、Lineは行ごと、Sentenceは文ごと、Paragraphは段落ごとに走査するようです。
ただ、Sentenceの区切り方法がどうなっているのかは知りません。SentenceとParagraphの実行結果が同じになっています。