Intro
I will try getting text styles and fonts in this time.
Getting specified styles and fonts
First, I will try getting the text styles and font what I specify them by myself.
DocFileReader.cs
using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; namespace OfficeFileAccessor.OfficeFiles.Readers; public class DocFileReader : IOfficeFileReader { private readonly NLog.Logger logger; public DocFileReader() { this.logger = NLog.LogManager.GetCurrentClassLogger(); } public void Read(IFormFile file) { using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false); Body? body = wordDoc.MainDocumentPart?.Document?.Body; if (body == null) { logger.Warn("Failed reading the document"); return; } foreach (OpenXmlElement elm in body.Elements()) { if (elm is Table table) { ... } else if (elm is Paragraph paragraph) { if (elm.InnerText.Trim().Length <= 0) { continue; } // Get full text from paragraph.InnerText logger.Info($"Paragraph Text: {paragraph.InnerText}"); PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph); } } } private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph) { // One paragraph is separated as multiple Run elements by styles and fonts foreach (Run run in paragraph.Elements<Run>()) { logger.Info($"Run Text: {run.InnerText}"); // Get text style and font from RunProperties. RunProperties? runProperties = run.RunProperties; if (runProperties != null) { logger.Info($"RunProperties found:"); var fonts = runProperties.RunFonts; if (fonts != null) { logger.Info($"Font Name: {GetFontName(fonts, mainPart)}"); } if (runProperties.Color != null) { logger.Info($"Color: {runProperties.Color.Val}"); } if (runProperties.Bold != null) { logger.Info($"Bold: {runProperties.Bold.Val}"); } if (runProperties.FontSize == null) { logger.Info($"FontSize was null"); } else if(int.TryParse(runProperties.FontSize.Val, out var size)) { // runProperties.FontSize.Val represents half-points logger.Info($"FontSize: {size / 2}"); } } logger.Info("------------"); } } private string GetFontName(RunFonts? runFonts, MainDocumentPart? mainPart) { string? result = runFonts?.Ascii ?? runFonts?.HighAnsi ?? runFonts?.EastAsia ?? runFonts?.ComplexScript; if (string.IsNullOrEmpty(result)) { result = "No font set"; } return result; } }
Result
Paragraph Text: カスタムfontに設定した場合 Run Text: カス RunProperties found: Font Name: No font set FontSize was null ------------ Run Text: タムfo RunProperties found: Font Name: Noto Sans JP Black FontSize was null ------------ Run Text: ntに RunProperties found: Font Name: No font set FontSize was null ------------ Run Text: 設定 RunProperties found: Font Name: No font set Bold: FontSize was null ------------ Run Text: した RunProperties found: Font Name: Meiryo UI FontSize: 16 ------------ Run Text: 場合 RunProperties found: Font Name: No font set Color: 60CAF3 FontSize: 22 ------------
Getting default styles and fonts
Unless I change the font, color, size, etc. by myself, the above code won't get that informations.
So I have to get them from the base style or ThemeFonts.
I can get informations of "見出し(Headline)".
But some styles like "標準(Normal)" don't have style informations, so I set the default values if I can't get "ParagraphStyleId" from the paragraph.
DocFileReader.cs
... public class DocFileReader : IOfficeFileReader { private readonly NLog.Logger logger; private enum FontType { Ascii = 0, HighAnsi, EastAsia, Latin, } private enum FontPriority { Major = 0, Minor } private record ThemeFont(string? EastAsiaMajorFont, string? EastAsiaMinorFont, string? LatinMajorFont, string? LatinMinorFont); private record TextFont (FontType FontType, string FontName); private class TextProps { public List<TextFont> Fonts { get; set; } = []; public int FontSize { get; set; } = 11; public bool Bold { get; set; } = false; public string Color { get; set; } = "000000"; } ... public void Read(IFormFile file) { using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false); Body? body = wordDoc.MainDocumentPart?.Document?.Body; ... ThemeFont themeFont = GetThemeFont(wordDoc.MainDocumentPart); foreach (OpenXmlElement elm in body.Elements()) { if (elm is Table table) { ... } else if (elm is Paragraph paragraph) { // Get full text from paragraph.InnerText logger.Info($"Paragraph Text: {paragraph.InnerText}"); PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph, themeFont); } } } /// <summary> /// Get fonts from Theme /// </summary> /// <param name="mainPart"></param> /// <returns></returns> private ThemeFont GetThemeFont(MainDocumentPart? mainPart) { if (mainPart?.ThemePart == null) { return new(null, null, null, null); } var theme = mainPart.ThemePart.Theme; var themeElements = theme.ThemeElements; if (themeElements == null) { return new(null, null, null, null); } var majorFontScheme = themeElements.FontScheme?.MajorFont; var minorFontScheme = themeElements.FontScheme?.MinorFont; if(majorFontScheme == null && minorFontScheme == null) { return new(null, null, null, null); } return new ThemeFont(EastAsiaMajorFont: majorFontScheme?.EastAsianFont?.Typeface, EastAsiaMinorFont: minorFontScheme?.EastAsianFont?.Typeface, LatinMajorFont: majorFontScheme?.LatinFont?.Typeface, LatinMinorFont: minorFontScheme?.LatinFont?.Typeface); } private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont) { TextProps? props = GetTextProps(mainPart, paragraph, themeFont); // One paragraph is separated as multiple Run elements by styles and font types foreach (Run run in paragraph.Elements<Run>()) { logger.Info($"Run Text: {run.InnerText}"); RunProperties? runProperties = run.RunProperties; if (runProperties == null) { logger.Info("runProperties was null"); } else { var fonts = GetFonts(runProperties.RunFonts); if(fonts.Count > 0) { foreach(var f in fonts) { logger.Info($"Font Name: {f.FontName} Type: {f.FontType}"); } } else if(props?.Fonts != null) { foreach(var f in props.Fonts) { logger.Info($"Font Name: {f.FontName} Type: {f.FontType}"); } } if (runProperties.Color == null) { if(props?.Color != null) { logger.Info($"Color: {props.Color}"); } } else { logger.Info($"Color: {runProperties.Color.Val}"); } if (runProperties.Bold == null) { if(props?.Bold != null) { logger.Info($"Bold: {props.Bold}"); } } else { logger.Info($"Bold: {runProperties.Bold.Val}"); } if (runProperties.FontSize == null) { if(props?.FontSize != null) { logger.Info($"FontSize: {props.FontSize}"); } } else if(int.TryParse(runProperties.FontSize.Val, out var size)) { // runProperties.FontSize.Val represents half-points logger.Info($"FontSize: {size / 2}"); } } logger.Info("------------"); } } /// <summary> /// Get style and font from paragraph /// </summary> /// <param name="mainPart"></param> /// <param name="paragraph"></param> /// <param name="themeFont"></param> /// <returns></returns> private TextProps? GetTextProps(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont) { string? styleId = paragraph.ParagraphProperties?.ParagraphStyleId?.Val?.Value; Style? style = GetStyleById(mainPart, styleId); TextProps? result = GetTextPropsFromRunProperties(style?.StyleRunProperties, themeFont); if(style == null) { return GenerateDefaultProps(themeFont); } else if(result == null || result.Fonts == null || result.Fonts.Count <= 0) { StyleRunProperties? inheritedRunProperties = GetInheritedRunProperties(style, mainPart); if (inheritedRunProperties == null) { return GenerateDefaultProps(themeFont); } else { logger.Info("Inherited from Base Style:"); return GetTextPropsFromRunProperties(inheritedRunProperties, themeFont); } } return result; } private static StyleRunProperties? GetInheritedRunProperties(Style style, MainDocumentPart? mainPart) { if (style.BasedOn != null) { string? baseStyleId = style.BasedOn.Val?.Value; Style? baseStyle = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>() .FirstOrDefault(s => s.StyleId == baseStyleId); if (baseStyle != null) { if (baseStyle.StyleRunProperties != null) { return baseStyle.StyleRunProperties; } else { return GetInheritedRunProperties(baseStyle, mainPart); } } } return null; } private static List<TextFont> GetFonts(RunFonts? runFonts) { List<TextFont> results = []; if(string.IsNullOrEmpty(runFonts?.Ascii?.Value) == false) { results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value)); } if(string.IsNullOrEmpty(runFonts?.HighAnsi?.Value) == false) { results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value)); } if(string.IsNullOrEmpty(runFonts?.EastAsia?.Value) == false) { results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value)); } return results; } private static TextProps GenerateDefaultProps(ThemeFont themeFont) { // If the style cannot be gotton, return the default font information. List<TextFont> fonts = []; if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false) { fonts.Add(new(FontType.Latin, themeFont.LatinMinorFont)); } if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false) { fonts.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont)); } return new () { Fonts = fonts, }; } private static Style? GetStyleById(MainDocumentPart? mainPart, string? styleId) { if(string.IsNullOrEmpty(styleId)) { return null; } IEnumerable<Style>? styles = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>(); if (styles != null) { return styles.FirstOrDefault(s => s.StyleId == styleId); } return null; } private TextProps? GetTextPropsFromRunProperties(StyleRunProperties? runProperties, ThemeFont themeFont) { if (runProperties == null) { return null; } TextProps? result = new(); var runFonts = runProperties.RunFonts; if (runFonts != null) { result.Fonts = GetTextFonts(runFonts); if(result.Fonts.Count <= 0) { result.Fonts = GetTextFonts(themeFont, runFonts); } } if (runProperties.Color?.Val != null) { result.Color = runProperties.Color.Val!; } if (runProperties.Bold != null) { result.Bold = true; } // runProperties.FontSize.Val represents half-points if (string.IsNullOrEmpty(runProperties.FontSize?.Val) == false && int.TryParse(runProperties.FontSize?.Val, out var size)) { result.FontSize = size / 2; } return result; } /// <summary> /// Get font name from RunFonts /// </summary> /// <param name="runFonts"></param> /// <returns></returns> private static List<TextFont> GetTextFonts(RunFonts runFonts) { List<TextFont> results = []; if (runFonts.Ascii?.Value != null && runFonts.Ascii.HasValue) { results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value)); } if (runFonts.HighAnsi?.Value != null && runFonts.HighAnsi.HasValue) { results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value)); } if (runFonts.EastAsia?.Value != null && runFonts.EastAsia.HasValue) { results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value)); } return results; } /// <summary> /// Get font name from ThemeFonts /// </summary> /// <param name="themeFont"></param> /// <param name="runFonts"></param> /// <returns></returns> private static List<TextFont> GetTextFonts(ThemeFont themeFont, RunFonts runFonts) { List<TextFont> results = []; // ThemeFont is divided into MajorFont and MinorFont. if(runFonts.EastAsiaTheme?.Value == ThemeFontValues.MajorEastAsia) { if(string.IsNullOrEmpty(themeFont.LatinMajorFont) == false) { results.Add(new(FontType.Latin, themeFont.LatinMajorFont)); } if(string.IsNullOrEmpty(themeFont.EastAsiaMajorFont) == false) { results.Add(new(FontType.EastAsia, themeFont.EastAsiaMajorFont)); } } else { if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false) { results.Add(new(FontType.Latin, themeFont.LatinMinorFont)); } if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false) { results.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont)); } } return results; } }
Result
Found a Paragraph with text: This is みだし1 Paragraph Text: This is みだし1 Run Text: This is みだし1 Font Name: 游ゴシック Light Type: Latin Color: 000000 Bold: False FontSize: 16 ------------ Found a Paragraph with text: あいう Paragraph Text: あいう Run Text: あいう Font Name: 游明朝 Type: Latin Color: 000000 Bold: False FontSize: 11 ------------ Found a Paragraph with text: 見出し2 Paragraph Text: 見出し2 Run Text: 見出し2 Font Name: 游ゴシック Light Type: Latin Color: 000000 Bold: False FontSize: 14 ------------ Found a Paragraph with text: えおか Paragraph Text: えおか Run Text: えおか Font Name: 游明朝 Type: Latin Color: 000000 Bold: False FontSize: 11 ------------ Found a Paragraph with text: きくけ Paragraph Text: きくけ Run Text: きくけ Font Name: 游明朝 Type: Latin Color: 000000 Bold: False FontSize: 11 ------------ Found a Paragraph with text: こさし Paragraph Text: こさし Run Text: こさし Font Name: 游明朝 Type: Latin Color: 000000 Bold: False FontSize: 11 ------------ ...
Top comments (0)