テキストを走査する

備忘録も兼ねてPowerPointプレゼンテーション内のテキストを走査するためのクラスを作成しました。
走査する単位はCharacter, Run, Word, Line, Sentence, Pragraphから選択できます。

using PowerPoint = Microsoft.Office.Interop.PowerPoint;
using Office = Microsoft.Office.Core;

namespace TorasenLib
{
    public class TextTraverser
    {
        public enum TraverseUnit
        {
            Character,
            Run,
            Word,
            Line,
            Sensence,
            Paragraph
        };

        public delegate void ForEachRun(Office.TextRange2 unit);
        private delegate Office.TextRange2 Decompose(Office.TextRange2 textrange, int start, int length);

        private readonly ForEachRun procedure_;
        private readonly Decompose decompose_;

        public TextTraverser(ForEachRun procedure, TraverseUnit unit)
        {
            if (procedure == null) {
                throw new System.ArgumentNullException("procedure");
            }
            procedure_ = procedure;
            switch (unit) {
                case TraverseUnit.Character:
                    decompose_ = new Decompose(GetCharacters_);
                    break;
                case TraverseUnit.Run:
                    decompose_ = new Decompose(GetRuns_);
                    break;
                case TraverseUnit.Word:
                    decompose_ = new Decompose(GetWords_);
                    break;
                case TraverseUnit.Line:
                    decompose_ = new Decompose(GetLines_);
                    break;
                case TraverseUnit.Sensence:
                    decompose_ = new Decompose(GetSentences_);
                    break;
                case TraverseUnit.Paragraph:
                    decompose_ = new Decompose(GetParagraphs_);
                    break;
                default:
                    System.Diagnostics.Debug.Assert(false, "should never get here");
                    break;
            }
        }

        public void Traverse(PowerPoint.Presentations presentations)
        {
            foreach (PowerPoint.Presentation presentation in presentations) {
                Traverse(presentation);
            }
        }

        public void Traverse(PowerPoint.Presentation presentation)
        {
            Traverse(presentation.Slides);
        }

        public void Traverse(PowerPoint.Slides slides)
        {
            foreach (PowerPoint.Slide slide in slides) {
                Traverse(slide);
            }
        }

        public void Traverse(PowerPoint.SlideRange sliderange)
        {
            foreach (PowerPoint.Slide slide in sliderange) {
                Traverse(slide);
            }
        }

        public void Traverse(PowerPoint.Slide slide)
        {
            Traverse(slide.Shapes);
        }

        public void Traverse(PowerPoint.Shapes shapes)
        {
            foreach (PowerPoint.Shape shape in shapes) {
                Traverse(shape);
            }
        }

        public void Traverse(PowerPoint.ShapeRange shaperange)
        {
            foreach (PowerPoint.Shape shape in shaperange) {
                Traverse(shape);
            }
        }

        public void Traverse(PowerPoint.Shape shape)
        {
            if (shape.HasTextFrame == Office.MsoTriState.msoTrue) {
                Traverse(shape.TextFrame2);
            }
            if (shape.HasTable == Office.MsoTriState.msoTrue) {
                foreach (PowerPoint.Row row in shape.Table.Rows) {
                    foreach (PowerPoint.Cell cell in row.Cells) {
                        if (cell.Shape.HasTextFrame == Office.MsoTriState.msoTrue) {
                            Traverse(cell.Shape.TextFrame2);
                        }
                    }
                }
            }

            if (shape.Type == Office.MsoShapeType.msoGroup) {
                foreach (PowerPoint.Shape child in shape.GroupItems) {
                    Traverse(child);
                }
            }
        }

        private void Traverse(PowerPoint.TextFrame2 textframe2)
        {
            if (textframe2.HasText == Office.MsoTriState.msoTrue) {
                Traverse(textframe2.TextRange);
            }
        }

        private void Traverse(Office.TextRange2 textrange2)
        {
            System.Diagnostics.Debug.Assert(procedure_ != null);
            System.Diagnostics.Debug.Assert(decompose_ != null);
            int length = 0;
            int idx = 1;
            while (length < textrange2.Length) {
                Office.TextRange2 unit = decompose_(textrange2, idx, -1);
                procedure_(unit);
                length += unit.Length;
                ++idx;
            }
        }

        private Office.TextRange2 GetCharacters_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Characters(start, length);
        }

        private Office.TextRange2 GetRuns_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Runs(start, length);
        }

        private Office.TextRange2 GetWords_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Words(start, length);
        }

        private Office.TextRange2 GetLines_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Lines(start, length);
        }

        private Office.TextRange2 GetSentences_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Sentences(start, length);
        }

        private Office.TextRange2 GetParagraphs_(Office.TextRange2 textrange, int start, int length)
        {
            return textrange.get_Paragraphs(start, length);
        }
    }
}

TextTraverser.cs 直

以下はRun単位で走査し、奇数番目に走査したテキストに二重線を設定する例です。

using PowerPoint = Microsoft.Office.Interop.PowerPoint;
using Office = Microsoft.Office.Core;

public void Test(PowerPoint.Presentation presentation)
{
    bool doubleStrike = true;
    TorasenLib.TextTraverser.ForEachRun procedure = new TorasenLib.TextTraverser.ForEachRun(
        delegate(Office.TextRange2 unit)
        {
            if (doubleStrike) {
                unit.Font.DoubleStrikeThrough = Office.MsoTriState.msoTrue;
            }
            doubleStrike = !doubleStrike;
        });
    TorasenLib.TextTraverser.TraverseUnit traverseUnit = TorasenLib.TextTraverser.TraverseUnit.Run;

    TorasenLib.TextTraverser traverser = new TorasenLib.TextTraverser(procedure, traverseUnit);
    traverser.Traverse(presentation);
}

それぞれの走査単位での実行結果。
Characterは1文字ごと、Runはフォント情報が等しいブロックごと、Wordは単語ごと、Lineは行ごと、Sentenceは文ごと、Paragraphは段落ごとに走査するようです。
ただ、Sentenceの区切り方法がどうなっているのかは知りません。SentenceとParagraphの実行結果が同じになっています。