Python操作Word详解:使用python-docx库实现功能全解析
目录
1 概述
官方文档:https://python-docx.readthedocs.io
2 快速入门
from docx import Document
from docx.shared import Inches
# 新建一个Word文档
document = Document()
# 添加标题,标题级别设置为0级(标题级别范围:0~9)
document.add_heading('Document Title', 0)
# 添加段落
p = document.add_paragraph('A plain paragraph having some ')
# 追加段落内容,并设置格式
p.add_run('bold').bold = True # 追加段落内容“bold”,并设置为粗体
p.add_run(' and some ') # 追加段落内容“ and some ”
p.add_run('italic.').italic = True # 追加段落内容“italic.”,并设置为斜体
# 添加标题,标题级别默认为1级(标题级别范围:0~9)
document.add_heading('Heading, level 1')
# 添加段落,并设置样式为引用
document.add_paragraph('Intense quote', style='Intense Quote')
# 添加段落,并设置样式为无序列表
document.add_paragraph('first item in unordered list', style='List Bullet')
# 添加段落,并设置样式为有序列表
document.add_paragraph('first item in ordered list', style='List Number')
# 添加图片,并设置图片宽、高分别为1.25、1.65英寸
document.add_picture('CSDN.png', width=Inches(1.25), height=Inches(1.65))
# 准备数据
data = [['姓名', '语文', '数学', '英语'],
['张三', 82, 96, 88],
['李四', 92, 86, 66],
['王五', 62, 98, 80]]
# 添加表格
table = document.add_table(rows=4, cols=4)
for i, row in enumerate(table.rows):
for j, cell in enumerate(row.cells):
cell.text = str(data[i][j])
# 添加分页符
document.add_page_break()
# 保存文档
document.save('test.docx')
3 深入理解文本对象(object),并设置属性(properties)
3.1 块级对象(block-level object)
3.2 内联级对象(inline-level object)
3.3 块级对象属性(block-level object properties)
水平对齐(horizontal alignment)
WD_ALIGN_PARAGRAPH 存储了多种对齐格式,例如:LEFT(左对齐)、CENTER(居中对齐)、RIGHT(右对齐)。 更多参见:https://python-docx.readthedocs.io/en/latest/api/enum/WdAlignParagraph.html#wdparagraphalignment
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
# 新建一个Word文档
document = Document()
# 添加段落
paragraph = document.add_paragraph('python-docx')
# 设置段落格式
paragraph_format = paragraph.paragraph_format
# 设置段落居中对齐
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 保存文档
document.save('test.docx')
缩进(indentation)
from docx import Document
from docx.shared import Pt, Inches
# 新建一个Word文档
document = Document()
# 添加段落
paragraph = document.add_paragraph('Indentation is the horizontal space between a paragraph and edge of its container, typically the page margin. A paragraph can be indented separately on the left and right side. The first line can also have a different indentation than the rest of the paragraph. A first line indented further than the rest of the paragraph has first line indent. A first line indented less has a hanging indent.')
# 设置段落格式
paragraph_format = paragraph.paragraph_format
# 设置段落左缩进
paragraph_format.left_indent = Pt(24)
# 设置段落右缩进
paragraph_format.right_indent = Inches(1)
# 设置段落第一行缩进
paragraph_format.first_line_indent = Pt(24)
# 保存文档
document.save('test.docx')
制表位(tab stops)
from docx import Document
from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER
from docx.shared import Cm
# 新建一个Word文档
document = Document()
# 添加段落
paragraph = document.add_paragraph('python-docx')
# 设置段落格式
paragraph_format = paragraph.paragraph_format
# 添加制表位
paragraph_format.tab_stops.add_tab_stop(Cm(12), WD_TAB_ALIGNMENT.CENTER, WD_TAB_LEADER.DOTS)
# 保存文档
document.save('test.docx')
段落间距(paragraph spacing)
from docx import Document
from docx.shared import Pt
# 新建一个Word文档
document = Document()
# 添加段落
paragraph = document.add_paragraph('python-docx')
# 设置段落格式
paragraph_format = paragraph.paragraph_format
# 设置段落间距
paragraph_format.space_before = Pt(12)
paragraph_format.space_after = Pt(24)
# 保存文档
document.save('test.docx')
行间距(line spacing)
from docx import Document
from docx.shared import Pt
from docx.shared import Length
# 新建一个Word文档
document = Document()
# 添加段落
paragraph = document.add_paragraph('Line spacing is controlled by the interaction of the line_spacing and line_spacing_rule properties. line_spacing is either a Length value, a (small-ish) float, or None. A Length value indicates an absolute distance. A float indicates a number of line heights. None indicates line spacing is inherited. line_spacing_rule is a member of the WD_LINE_SPACING enumeration or None.')
# 设置段落格式
paragraph_format = paragraph.paragraph_format
# 设置行间距
paragraph_format.line_spacing = None # line_spacing为None表示默认行间距
print(isinstance(paragraph_format.line_spacing, Length)) # False
print(paragraph_format.line_spacing_rule) # None
paragraph_format.line_spacing = Pt(24) # line_spacing为Length类型表示绝对距离(24磅)
print(isinstance(paragraph_format.line_spacing, Length)) # True
print(paragraph_format.line_spacing_rule) # EXACTLY (4)
paragraph_format.line_spacing = 2 # line_spacing不为None或Length类型表示相对距离(2倍行距)
print(isinstance(paragraph_format.line_spacing, Length)) # False
print(paragraph_format.line_spacing_rule) # MULTIPLE (5)
# 保存文档
document.save('test.docx')
分页属性(pagination properties)
四个分页属性:
keep_together:将整个段落保存在同一页上,如果段落会分开成两页,则在段落之前发出分页符。
keep_with_next:将一个段落与下一个段落保存在同一页上。例如,这可以用于将章节标题与章节的第一段保持在同一页上。
page_break_before:将一个段落放置在新页面的顶部。这可以在章节标题上使用,以确保章节在新页面上开始。
widow_control:避免将段落的第一行或最后一行与段落的其余部分放在单独的页面上。
这些属性都有三种状态:True 表示使用属性;False 表示不使用属性;None 表示从样式层次结构继承属性值。
from docx import Document
# 新建一个Word文档
document = Document()
# 添加标题,标题级别设置为0级(标题级别范围:0~9)
heading = document.add_heading('Document Title', 0)
# 添加段落
paragraph = document.add_paragraph('python-docx')
# 设置段落格式
heading_format = heading.paragraph_format
paragraph_format = paragraph.paragraph_format
# 设置分页属性
heading_format.keep_with_next = True
heading_format.page_break_before = True
paragraph_format.keep_together = True
paragraph_format.widow_control = True
# 保存文档
document.save('test.docx')
3.4 内联级对象属性(inline-level object properties)
- MSO_THEME_COLOR 存储了多种主题颜色,例如:ACCENT_1、ACCENT_2、DARK_1、DARK_2。更多参见:https://python-docx.readthedocs.io/en/latest/api/enum/MsoThemeColorIndex.html#msothemecolorindex
- WD_UNDERLINE 存储了多种下划线样式,例如:SINGLE(单实线)、DOUBLE(双实线)DOTTED(虚线)。更多参见:https://python-docx.readthedocs.io/en/latest/api/enum/WdUnderline.html#wdunderline
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.dml import MSO_THEME_COLOR
from docx.enum.text import WD_UNDERLINE
document = Document()
run = document.add_paragraph().add_run('python-docx')
font = run.font
font.name = '微软雅黑' # 字体
font.size = Pt(12) # 字体大小
font.color.rgb = RGBColor(255, 0, 0) # 字体颜色,用RGB颜色值表示
font.color.theme_color = MSO_THEME_COLOR.ACCENT_1 # 字体颜色,使用主题颜色
font.bold = True # 粗体
font.italic = True # 斜体
font.underline = WD_UNDERLINE.DOT_DASH # 下划线
document.save('test.docx')
4 深入理解样式(styles),以及如何运用样式
4.1 内置样式
所有样式
from docx import Document
document = Document()
styles = document.styles
# 所有样式
document.add_heading('所有样式', level=0)
for style in styles:
document.add_paragraph(style.name)
document.save('test.docx')
段落样式(paragraph styles)
from docx import Document
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 段落样式
document.add_heading('段落样式', level=0)
for style in styles:
if style.type == WD_STYLE_TYPE.PARAGRAPH:
document.add_paragraph(style.name, style=style)
document.save('test.docx')
字符样式(character styles)
from docx import Document
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 字符样式
document.add_heading('字符样式', level=0)
for style in styles:
if style.type == WD_STYLE_TYPE.CHARACTER:
document.add_paragraph().add_run(style.name, style=style)
document.save('test.docx')
表格样式(table styles)
from docx import Document
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 表格样式
document.add_heading('表格样式', level=0)
for style in styles:
if style.type == WD_STYLE_TYPE.TABLE:
document.add_paragraph(style.name)
document.add_table(2, 2, style=style)
document.save('test.docx')
4.2 自定义样式
自定义内置样式
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_UNDERLINE
document = Document()
styles = document.styles
# 自定义内置样式
style = styles['Body Text']
font = style.font
font.name = '微软雅黑'
font.size = Pt(12)
font.color.rgb = RGBColor(255, 0, 0)
font.bold = True
font.italic = True
font.underline = WD_UNDERLINE.DOT_DASH
# 应用内置样式
document.add_heading('自定义内置样式', level=0)
document.add_paragraph('style', style=style)
# 删除内置样式
# print(len(styles))
# style.delete()
# print(len(styles))
document.save('test.docx')
自定义段落样式
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 自定义段落样式
paragraph_style = styles.add_style('ParagraphStyle', WD_STYLE_TYPE.PARAGRAPH)
paragraph_format = paragraph_style.paragraph_format
paragraph_format.first_line_indent = Inches(2)
paragraph_format.space_before = Pt(12)
paragraph_format.line_spacing = 2
# 应用段落样式
document.add_heading('自定义段落样式', level=0)
# document.add_paragraph('paragraph_style', style=paragraph_style)
document.add_paragraph('paragraph_style', style='ParagraphStyle')
# 删除段落样式
# print(len(styles))
# paragraph_style.delete()
# styles['ParagraphStyle'].delete()
# print(len(styles))
document.save('test.docx')
自定义字符样式
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_UNDERLINE
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 自定义字符样式
character_style = styles.add_style('CharacterStyle', WD_STYLE_TYPE.CHARACTER)
font = character_style.font
font.name = '微软雅黑'
font.size = Pt(16)
font.color.rgb = RGBColor(255, 165, 0)
font.bold = True
font.italic = True
font.underline = WD_UNDERLINE.DOT_DASH
# 应用字符样式
document.add_heading('自定义字符样式', level=0)
# document.add_paragraph().add_run('character_style', style=character_style)
document.add_paragraph().add_run('character_style', style=styles['CharacterStyle'])
# 删除字符样式
# print(len(styles))
# character_style.delete()
# print(len(styles))
document.save('test.docx')
自定义表格样式
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.style import WD_STYLE_TYPE
document = Document()
styles = document.styles
# 自定义表格样式
table_style = styles.add_style('TableStyle', WD_STYLE_TYPE.TABLE)
font = table_style.font
paragraph_format = table_style.paragraph_format
font.name = '微软雅黑'
font.size = Pt(18)
font.color.rgb = RGBColor(255, 255, 0)
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 应用表格样式
document.add_heading('自定义表格样式', level=0)
# table = document.add_table(rows=4, cols=4, style=table_style)
table = document.add_table(rows=4, cols=4)
table.style = table_style
data = [['姓名', '语文', '数学', '英语'],
['张三', 82, 96, 88],
['李四', 92, 86, 66],
['王五', 62, 98, 80]]
for i, row in enumerate(table.rows):
for j, cell in enumerate(row.cells):
cell.text = str(data[i][j])
# 设置表格首行/列样式
for cell in table.rows[0].cells + table.columns[0].cells:
cell.vertical_alignment = WD_ALIGN_PARAGRAPH.CENTER
cell.paragraphs[0].runs[0].bold = True
# 删除表格样式
# print(len(styles))
# table_style.delete()
# print(len(styles))
document.save('test.docx')
4.3 应用样式
将样式对象赋值给样式属性
document.add_paragraph('python', style=styles['Body Text'])
或
paragraph = document.add_paragraph('python')
paragraph.style = styles['Body Text']
将样式名称赋值给样式属性
document.add_paragraph('python', style='Body Text')
或
paragraph = document.add_paragraph('python')
paragraph.style = 'Body Text'
4.4 删除样式
styles['Body Text'].delete()
作者:云淡丶风轻