initial commit

Signed-off-by: Peter Siegmund <mars3142@noreply.mars3142.dev>
This commit is contained in:
2025-10-31 23:37:30 +01:00
commit bf6b52fd94
9654 changed files with 4035664 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
testdata/* -text
maint/manifest-* -text

View File

@@ -0,0 +1,49 @@
codecov:
strict_yaml_branch: default
require_ci_to_pass: false
notify:
wait_for_ci: false
notify_error: true
coverage:
range: 75..90
round: nearest
precision: 2
status:
project: false
patch:
default:
target: 100%
threshold: 5%
github_checks:
annotations: false
comment: false
# layout: "condensed_header, condensed_files, condensed_footer"
# hide_project_coverage: true
# require_head: true
# require_base: true
# require_changes: "coverage_drop OR uncovered_patch"
component_management:
individual_components:
- component_id: library
name: "Core library"
paths:
- '!src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
statuses:
- type: project
target: auto
threshold: 0.5%
- component_id: test_binaries
name: "Test binaries"
paths:
- 'src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
statuses:
- type: project
target: auto
threshold: 2%

View File

@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: monthly

View File

@@ -0,0 +1,347 @@
name: Build
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
linux:
name: Linux
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install zlib1g-dev libbz2-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
- name: Build
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
- name: Test (main test script)
run: ./RunTest
- name: Test (JIT test program)
run: ./pcre2_jit_test
- name: Test (pcre2grep test script)
run: ./RunGrepTest
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
alpine:
name: alpine
runs-on: ubuntu-latest
container: alpine
steps:
- name: Setup
run: apk add --no-cache automake autoconf gcc libtool make musl-dev git zlib zlib-dev bzip2 bzip2-dev #musl-locales
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
- name: Build
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
- name: Test (main test script)
run: ./RunTest
- name: Test (JIT test program)
run: ./pcre2_jit_test
- name: Test (pcre2grep test script)
run: ./RunGrepTest
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
macos:
name: macOS universal
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-macos
windows:
name: Windows
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
arch: ["Win32", "x64"]
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A ${{ matrix.arch }}
- name: Build
run: cmake --build build --config Release
- name: Test
run: cd build && ctest -C Release -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --config Release --prefix install-dir
../maint/RunManifestTest.ps1 install-dir ../maint/manifest-cmakeinstall-windows
freebsd:
name: FreeBSD
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Build & test
uses: vmactions/freebsd-vm@debf37ca7b7fa40e19c542ef7ba30d6054a706a4 # v1.1.5
with:
usesh: true
run: |
set -e
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
make -j3 CPPFLAGS='-Wall -Wextra -Werror'
make check
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-freebsd
solaris:
name: Solaris
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Transfer Oracle Studio certificates
env:
PKG_ORACLE_COM_CERTIFICATE_PEM: ${{ secrets.PKG_ORACLE_COM_CERTIFICATE_PEM }}
PKG_ORACLE_COM_KEY_PEM: ${{ secrets.PKG_ORACLE_COM_KEY_PEM }}
run: |
printenv PKG_ORACLE_COM_CERTIFICATE_PEM > pkg.oracle.com.certificate.pem
printenv PKG_ORACLE_COM_KEY_PEM > pkg.oracle.com.key.pem
- name: Prepare
run: ./autogen.sh
- name: Build & test
uses: vmactions/solaris-vm@a89b9438868c70db27e41625f0a5de6ff5e90809 # v1.1.0
with:
usesh: true
# Seriously! Solaris is the only OS to actually ship without a C
# compiler, and not even to provide a simple download to get one!
# You have to actually register with Oracle to get an X.509
# certificate before you can even download their compiler. Whatever.
prepare: |
cp "$GITHUB_WORKSPACE/pkg.oracle.com.key.pem" /root/pkg.oracle.com.key.pem
cp "$GITHUB_WORKSPACE/pkg.oracle.com.certificate.pem" /root/pkg.oracle.com.certificate.pem
sudo pkg set-publisher \
-k /root/pkg.oracle.com.key.pem \
-c /root/pkg.oracle.com.certificate.pem \
-G "*" -g https://pkg.oracle.com/solarisstudio/release solarisstudio
pkg install developer/build/make system/header
pkg install --accept developerstudio-126/cc
run: |
set -e
PATH=/opt/developerstudio12.6/bin:"$PATH"
export PATH
CC=cc
export CC
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
make CPPFLAGS='-Wall -Wextra -Werror'
make check
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
distcheck:
name: Build & verify distribution
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
permissions:
id-token: write # Needed to make calls to the Sigstore service
attestations: write # Needed to write the attestation to GitHub's database
contents: read
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: |
./autogen.sh
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
# causes the m4 macros to be copied with incorrect permissions.
chmod u=rw,go=r m4/*.m4
- name: Configure
run: ./configure
- name: Distcheck
run: make distcheck -j3
- name: Manifest
run: |
mkdir tarball-dir
tar -C tarball-dir -xzf pcre2-*.tar.gz
# Budge the directory, so we don't back the version number into the
# `manifest-tarball` file:
mv tarball-dir/pcre2-* tarball-dir/pcre2-SNAPSHOT
maint/RunManifestTest tarball-dir maint/manifest-tarball
- name: Upload to GitHub artifacts
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Distribution release"
path: |
pcre2-*.tar.bz2
pcre2-*.tar.gz
pcre2-*.zip
if-no-files-found: error
- name: Attest
uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0
if: |
github.event_name != 'pull_request' &&
(startsWith(github.ref, 'refs/heads/release/') ||
startsWith(github.ref, 'refs/tags/pcre2-'))
with:
subject-path: 'pcre2-*.tar.bz2, pcre2-*.tar.gz, pcre2-*.zip'
coverage:
name: Code coverage
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install zlib1g-dev libbz2-dev libedit-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: CC="clang -fprofile-instr-generate -fcoverage-mapping" cmake -DCMAKE_BUILD_TYPE=Debug -DPCRE2_DEBUG=OFF -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DPCRE2_SUPPORT_LIBEDIT=ON -DPCRE2_SUPPORT_LIBREADLINE=OFF -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && LLVM_PROFILE_FILE="coverage-%m.profraw" ctest -j1 --output-on-failure
- name: Report
run: |
LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1`
echo "Using LLVM version $LLVM_VER"
# Merge the profiles gathered
cd build
llvm-profdata-$LLVM_VER merge -sparse coverage-*.profraw -o coverage.profdata
# Output HTML, for archiving and browsing later
llvm-cov-$LLVM_VER show \
-format=html -output-dir=coverage-report -show-line-counts-or-regions -show-branches=percent \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./
# Output LCOV-compatible output, for downstream tools
llvm-cov-$LLVM_VER export \
-format=lcov \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./ \
> ./coverage-lcov.info
# Output text summary to build log
echo '```' > "$GITHUB_STEP_SUMMARY"
llvm-cov-$LLVM_VER report \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./ \
>> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
- name: Upload report to GitHub artifacts
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Coverage report"
path: './build/coverage-report'
if-no-files-found: error
- name: Upload report to Codecov
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
disable_search: true
files: ./build/coverage-lcov.info

View File

@@ -0,0 +1,31 @@
name: CIFuzz
on:
workflow_dispatch:
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
Fuzzing:
runs-on: ubuntu-latest
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
with:
oss-fuzz-project-name: 'pcre2'
dry-run: false
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
with:
oss-fuzz-project-name: 'pcre2'
fuzz-seconds: 300
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts
path: ./out/artifacts

View File

@@ -0,0 +1,71 @@
name: Clang Static Analyzer
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
Analyze:
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
contents: read
env:
# The @microsoft/sarif-multitool tool actually uses DotnetCore, which in
# turn aborts when it finds that GitHub's CI machine doesn't have ICU.
# Just turn off localisation. A future version of the ubuntu-24.04 or
# ubuntu-latest runners might not need this workaround.
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install ninja-build clang-tools
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: |
mkdir build
cd build
scan-build cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug ..
- name: Build
run: |
# Inefficiently run clang scan twice; once to generate HTML, and secondly
# to generate SARIF files. Ideally we would have some way to scan once and
# generate one of those outputs from the other, but I don't know a good way
# to do that.
cd build
scan-build -o clang-report/ ninja
ninja clean
scan-build -o clang-sarif -sarif ninja
# Work around issue in GitHub's SARIF ingestion - merge all SARIF files into one
npx -y @microsoft/sarif-multitool merge clang-sarif/*/*.sarif --output-file=clang.sarif
# Upload the browsable HTML report as an artifact.
- name: Upload report
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Clang Static Analyzer report"
path: './build/clang-report'
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
sarif_file: build/clang.sarif
category: clang-analyzer

View File

@@ -0,0 +1,77 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ master, "release/**" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
schedule:
- cron: '27 6 * * 4'
permissions:
contents: read
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
actions: read
contents: read
strategy:
fail-fast: false
matrix:
language: [ 'cpp' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6

View File

@@ -0,0 +1,378 @@
name: Dev
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
canary:
# Tests with: Debug & assertions; link-size=4; libedit
name: GCC -O0
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install libedit-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CC='gcc -fsanitize=undefined,address -fsanitize-undefined-trap-on-error' CFLAGS='-O0 -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --enable-pcre2test-libedit --with-link-size=4
- name: Build
run: make -j3
- name: Test (main test script)
run: ./RunTest
- name: Test (JIT test program)
run: ./pcre2_jit_test
- name: Test (pcre2grep test script)
run: ./RunGrepTest
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
dragon:
# Tests with: clang AB/UB; link-size=3
name: Clang
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
opt: ["-O0", "-O2"]
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CC='clang -fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-integer-overflow,unsigned-shift-base,function' CFLAGS='${{ matrix.opt }} -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter -Wno-error=deprecated-declarations -Wno-error=incompatible-library-redeclaration -Wno-error=incompatible-pointer-types-discards-qualifiers' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=3
- name: Build
run: make -j3
- name: Test (main test script)
run: |
ulimit -S -s 49152 # Raise stack limit; ASAN with -O0 is very stack-hungry
./RunTest
- name: Test (JIT test program)
run: ./pcre2_jit_test
- name: Test (pcre2grep test script)
run: ./RunGrepTest
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
greatawk:
# Tests with: GCC, -O3, oldest supported Ubuntu (in non-extended support)
name: GCC -O3
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
dodo:
# Tests with: Autconf on oldest supported Ubuntu (in non-extended support)
name: GCC -Os, old Autotools
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CFLAGS='-Os -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug
- name: Build
run: make -j3
- name: Test
run: make check
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
wasp:
# Tests with: French locale; oldest supported CMake; no JIT; -Os; libreadline
name: GCC -Os, CMake+ninja, no JIT
runs-on: ubuntu-latest
env:
CMAKE_VER: "3.15.7"
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install language-pack-fr ninja-build libreadline-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Cache CMake
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
key: cmake-${{ env.CMAKE_VER }}-Linux-x86_64
path: cmake-${{ env.CMAKE_VER }}-Linux-x86_64.tar.gz
- name: Install CMake
run: |
[ -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz ] || curl -L -S -O "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.tar.gz"
tar -xz -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz
realpath "cmake-${CMAKE_VER}-Linux-x86_64/bin" >> "$GITHUB_PATH"
- name: Configure
run: |
cmake --version | grep "version ${CMAKE_VER}" || (echo "CMake version mismatch" && exit 1)
CC='clang' CFLAGS='-fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-shift-base,function -pedantic -Wall -Wextra -Wpedantic -Wdeclaration-after-statement -Wshadow -Wno-overlength-strings -Werror -Wno-error=incompatible-pointer-types-discards-qualifiers' cmake -G Ninja -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DPCRE2_SUPPORT_LIBREADLINE=ON -DCMAKE_BUILD_TYPE=MinSizeRel -B build
- name: Build
run: ninja -C build
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
bat:
# Tests with: MSVC 32-bit, and a variety of CMake options
name: Windows (Win32)
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2GREP_SUPPORT_CALLOUT_FORK=OFF -DPCRE2_DEBUG=ON -DPCRE2_NEWLINE=ANYCRLF -DPCRE2_STATIC_PIC=ON -DPCRE2_STATIC_RUNTIME=ON -DPCRE2_SUPPORT_BSR_ANYCRLF=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A Win32
- name: Build
run: cmake --build build --config RelWithDebInfo
- name: Test
run: cd build && ctest -C RelWithDebInfo -j3 --output-on-failure
pterodactyl:
# Tests with: MSVC 64-bit, Debug, shared libraries
name: Windows (x64)
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=OFF -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A x64
- name: Build
run: cmake --build build --config Debug
- name: Test
run: cd build && ctest -C Debug -j3 --output-on-failure
bigbird:
# Job to execute ManyConfigTests
name: manyconfig
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install -y valgrind
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Run
run: |
./autogen.sh
./maint/ManyConfigTests
camel:
# Job to execute RunPerlTest
name: perl
runs-on: ubuntu-latest
container: perl:devel
steps:
- name: Setup
run: |
apt-get -qq update
apt-get -qq install cmake ninja-build
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: yes
- name: Configure
run: cmake -G Ninja -B build -DPCRE2_BUILD_PCRE2_8=OFF -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_NEVER_BACKSLASH_C=ON -DPCRE2_DEBUG=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
- name: Build
run: ninja -C build
- name: Test
run: |
cd build
ctest -j3 --output-on-failure
cd ..
perl -v
maint/RunPerlTest
chaffinch:
# Job to verify that the CMake "unity" build (single-file / jumbo build) passes.
# If this fails, it's usually because two different files define some file-static
# functions or macros which collide.
name: CMake unity build
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DCMAKE_UNITY_BUILD=ON -DCMAKE_UNITY_BUILD_BATCH_SIZE=0 -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
zebrilus:
# Tests with: Zig compiler
name: Zig
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Setup
run: |
sudo snap install zig --classic --beta
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Build
run: zig build
- name: Test
run: |
# Zig does something weird with the stack - it uses more space than the
# equivalent plain C program.
ulimit -S -s 16384
srcdir=`pwd` pcre2test=`pwd`/zig-out/bin/pcre2test ./RunTest
bazel:
# Tests with: Bazel build system
name: Bazel
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "windows-latest"]
runs-on: ${{ matrix.os }}
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Build
run: bazelisk build //... --enable_runfiles --incompatible_strict_action_env
- name: Test
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all
heron:
# Job to verify that the tasks performed by PrepareRelease have been done. It is
# the committer's responsibility (currently) to run PrepareRelease themselves when
# making a PR, so that everything is kept in-sync.
name: Check autogenerated file freshness
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: false
fetch-depth: 0
fetch-tags: false
- name: PrepareRelease
run: maint/PrepareRelease
- name: 'Rebuild *.h.generic'
run: |
./autogen.sh && ./configure
rm -f src/*.generic
make src/config.h.generic src/pcre2.h.generic
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
# causes the m4 macros to be copied with incorrect permissions.
chmod u=rw,go=r m4/*.m4
- name: Working directory clean
run: |
if [ -n "`git status --porcelain`" ] ; then
(
echo "Dirty working tree! Affected files:"
git status --porcelain || true
echo ""
echo "Diff:"
git diff || true
) >&2
exit 1
fi

View File

@@ -0,0 +1,58 @@
name: Scorecards supply-chain security
on:
workflow_dispatch:
# Only the default branch is supported.
branch_protection_rule:
schedule:
- cron: '23 17 * * 1'
push:
branches: [ master ]
permissions: read-all
jobs:
analysis:
name: Scorecards analysis
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
# Needed to publish the results to Scorecard's service.
id-token: write
actions: read
contents: read
steps:
- name: "Checkout code"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
with:
results_file: results.sarif
results_format: sarif
# repo_token: ${{ secrets.GITHUB_TOKEN }}
# Publish the results to enable scorecard badges. For more details, see
# https://github.com/ossf/scorecard-action#publishing-results.
# For private repositories, `publish_results` will automatically be set to `false`,
# regardless of the value entered here.
publish_results: true
# Upload the results as artifacts (optional).
- name: "Upload artifact"
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: SARIF file
path: results.sarif
retention-days: 5
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
sarif_file: results.sarif
category: ossf-scorecard

View File

@@ -0,0 +1,105 @@
# Public .gitignore file for PCRE2
build/
build-*/
*.a
*.gcda
*.gcno
*.lo
*.la
*.pc
*.o
*~
*-coverage*
__pycache__
.deps
.libs
Makefile
Makefile.in
RunGrepTest.log
RunGrepTest.trs
RunTest.log
RunTest.trs
aclocal.m4
ar-lib
autom4te.cache
compile
config.guess
config.log
config.status
config.sub
configure
depcomp
install-sh
libtool
ltmain.sh
missing
pcre2-config
pcre2_dftables
pcre2_jit_test
pcre2_jit_test.exe
pcre2_jit_test.log
pcre2_jit_test.trs
pcre2posix_test
pcre2posix_test.exe
pcre2posix_test.log
pcre2posix_test.trs
pcre2demo
pcre2fuzzcheck-*
pcre2fuzzer-*
pcre2grep
pcre2grep.exe
pcre2test
pcre2test.exe
test-driver
test-suite.log
test3input
test3output
test3outputA
test3outputB
testNinput
testNinputgrep
teststderr
teststderrM
teststderrgrep
teststdout
teststdoutM
testtemp1
testtemp1grep
testtemp2
testtemp2grep
testtry
testtry2
testtrygrep
testSinput
testbtables
testsaved1
testsaved2
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
src/.deps
src/.dirstamp
src/config.h
src/config.h.in
src/pcre2.h
src/pcre2_chartables.c
src/stamp-h1
/bazel-*
*.bazel.lock
zig-out/
zig-cache/
.zig-cache/
# End

View File

@@ -0,0 +1,3 @@
[submodule "deps/sljit"]
path = deps/sljit
url = https://github.com/zherczeg/sljit.git

View File

@@ -0,0 +1,200 @@
PCRE2 Authorship and Contributors
=================================
COPYRIGHT
---------
Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for
copyright details.
MAINTAINERS
-----------
The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel.
Since 2024, the contributors with administrator access to the project are now
Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for
GPG keys.
Both administrators are volunteers acting in a personal capacity.
<table>
<thead>
<tr>
<th>Name</th>
<th>Role</th>
<tr>
</thead>
<tbody>
<tr>
<td>
Nicholas Wilson<br/>
`nicholas@nicholaswilson.me.uk`<br/>
Currently of Microsoft Research Cambridge, UK
</td>
<td>
* General project administration & maintenance
* Release management
* Code maintenance
</td>
</tr>
<tr>
<td>
Zoltán Herczeg<br/>
`hzmester@freemail.hu`<br/>
Currently of the University of Szeged, Hungary
</td>
<td>
* Code maintenance
* Ownership of `sljit` and PCRE2's JIT
</td>
</tr>
</tbody>
</table>
CONTRIBUTORS
------------
Many others have participated and contributed to PCRE2 over its history.
The maintainers are grateful for all contributions and participation over the
years. We apologise for any names we have forgotten.
We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and
maintainer from 1997 to 2024.
All names listed alphabetically.
### Contributors to PCRE2
This list includes names up until the PCRE2 10.44 release. New names will be
added from the Git history on each release.
Scott Bell
Carlo Marcelo Arenas Belón
Edward Betts
Jan-Willem Blokland
Ross Burton
Dmitry Cherniachenko
Alexey Chupahin
Jessica Clarke
Alejandro Colomar
Jeremie Courreges-Anglas
Addison Crump
Alex Dowad
Daniel Engberg
Daniel Richard G
David Gaussmann
Andrey Gorbachev
Jordan Griege
Jason Hood
Bumsu Hyeon
Roy Ivy
Martin Joerg
Guillem Jover
Ralf Junker
Ayesh Karunaratne
Michael Kaufmann
Yunho Kim
Joshua Kinard
David Korczynski
Uwe Korn
Jonas Kvinge
Kristian Larsson
Kai Lu
Behzod Mansurov
B. Scott Michel
Nathan Moinvaziri
Mike Munday
Marc Mutz
Fabio Pagani
Christian Persch
Tristan Ross
William A Rowe Jr
David Seifert
Yaakov Selkowitz
Rich Siegel
Karl Skomski
Maciej Sroczyński
Wolfgang Stöggl
Thomas Tempelmann
Greg Thain
Lucas Trzesniewski
Theodore Tsirpanis
Matthew Vernon
Rémi Verschelde
Thomas Voss
Ezekiel Warren
Carl Weaver
Chris Wilson
Amin Yahyaabadi
Joe Zhang
### Contributors to PCRE1
These people contributed either by sending patches or reporting serious issues.
Irfan Adilovic
Alexander Barkov
Daniel Bergström
David Burgess
Ross Burton
David Byron
Fred Cox
Christian Ehrlicher
Tom Fortmann
Lionel Fourquaux
Mike Frysinger
Daniel Richard G
Dair Gran
"Graycode" (Red Hat Product Security)
Viktor Griph
Wen Guanxing
Robin Houston
Martin Jerabek
Peter Kankowski
Stephen Kelly
Yunho Kim
Joshua Kinard
Carsten Klein
Evgeny Kotkov
Ronald Landheer-Cieslak
Alan Lehotsky
Dmitry V. Levin
Nuno Lopes
Kai Lu
Giuseppe Maxia
Dan Mooney
Marc Mutz
Markus Oberhumer
Sheri Pierce
Petr Pisar
Ari Pollak
Bob Rossi
Ruiger Rill
Michael Shigorin
Rich Siegel
Craig Silverstein (C++ wrapper)
Karl Skomski
Paul Sokolovsky
Stan Switzer
Ian Taylor
Mark Tetrode
Jeff Trawick
Steven Van Ingelgem
Lawrence Velazquez
Jiong Wang
Stefan Weber
Chris Wilson
Thanks go to Jeffrey Friedl for testing and debugging assistance.

View File

@@ -0,0 +1,172 @@
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@bazel_skylib//rules:native_binary.bzl", "native_test")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
copy_file(
name = "config_h_generic",
src = "src/config.h.generic",
out = "src/config.h",
)
copy_file(
name = "pcre2_h_generic",
src = "src/pcre2.h.generic",
out = "src/pcre2.h",
)
copy_file(
name = "pcre2_chartables_c",
src = "src/pcre2_chartables.c.dist",
out = "src/pcre2_chartables.c",
)
# Removed src/pcre2_ucptables.c below because it is #included in
# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint.
# PH, 22-March-2023.
cc_library(
name = "pcre2",
srcs = [
"src/pcre2_auto_possess.c",
"src/pcre2_chkdint.c",
"src/pcre2_compile.c",
"src/pcre2_compile_class.c",
"src/pcre2_config.c",
"src/pcre2_context.c",
"src/pcre2_convert.c",
"src/pcre2_dfa_match.c",
"src/pcre2_error.c",
"src/pcre2_extuni.c",
"src/pcre2_find_bracket.c",
"src/pcre2_jit_compile.c",
"src/pcre2_maketables.c",
"src/pcre2_match.c",
"src/pcre2_match_data.c",
"src/pcre2_newline.c",
"src/pcre2_ord2utf.c",
"src/pcre2_pattern_info.c",
"src/pcre2_script_run.c",
"src/pcre2_serialize.c",
"src/pcre2_string_utils.c",
"src/pcre2_study.c",
"src/pcre2_substitute.c",
"src/pcre2_substring.c",
"src/pcre2_tables.c",
"src/pcre2_ucd.c",
"src/pcre2_valid_utf.c",
"src/pcre2_xclass.c",
":pcre2_chartables_c",
"src/pcre2_compile.h",
"src/pcre2_internal.h",
"src/pcre2_intmodedep.h",
"src/pcre2_ucp.h",
"src/pcre2_util.h",
":config_h_generic",
],
textual_hdrs = [
"src/pcre2_jit_match.c",
"src/pcre2_jit_misc.c",
"src/pcre2_ucptables.c",
],
hdrs = [
":pcre2_h_generic",
],
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"PCRE2_CODE_UNIT_WIDTH=8",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
],
includes = ["src"],
strip_include_prefix = "src",
visibility = ["//visibility:public"],
)
cc_library(
name = "pcre2-posix",
srcs = [
"src/pcre2posix.c",
":config_h_generic",
],
hdrs = [
"src/pcre2posix.h",
],
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"PCRE2_CODE_UNIT_WIDTH=8",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
],
includes = ["src"],
strip_include_prefix = "src",
visibility = ["//visibility:public"],
deps = [":pcre2"],
)
# Totally weird issue in Bazel. It won't let you #include any files unless they
# are declared to the build system. OK, fair enough. But - for a cc_binary it
# uses the file extension to determine whether it's a header or a compilation
# unit. But... we have several .c files which are #included, rather than treated
# as a compilation unit.
#
# For cc_library() above, we can overcome this with textual_hdrs. But that
# doesn't work for cc_binary(). Here's our workaround.
#
# https://github.com/bazelbuild/bazel/issues/680
cc_library(
name = "pcre2test_dotc_headers",
hdrs = [
"src/pcre2_chkdint.c",
"src/pcre2_printint.c",
"src/pcre2_tables.c",
"src/pcre2_ucd.c",
"src/pcre2_valid_utf.c",
],
strip_include_prefix = "src",
visibility = ["//visibility:private"],
)
cc_binary(
name = "pcre2test",
srcs = [
"src/pcre2test.c",
":config_h_generic",
],
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"HAVE_STRERROR",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
"SUPPORT_PCRE2_8",
] + select({
"@platforms//os:windows": [],
"//conditions:default": ["HAVE_UNISTD_H"],
}),
linkopts = select({
"@platforms//os:windows": ["-STACK:2500000"],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"],
)
filegroup(
name = "testdata",
srcs = glob(["testdata/*"]),
)
native_test(
name = "pcre2_test",
src = select({
"@platforms//os:windows": "RunTest.bat",
"//conditions:default": "RunTest",
}),
out = select({
"@platforms//os:windows": "RunTest.bat",
"//conditions:default": "RunTest",
}),
data = [":pcre2test", ":testdata"],
size = "small",
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
PCRE2 LICENCE
Please see the file LICENCE in the PCRE2 distribution for licensing details.
End

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,924 @@
Technical notes about PCRE2
---------------------------
These are very rough technical notes that record potentially useful information
about PCRE2 internals. PCRE2 is a library based on the original PCRE library,
but with a revised (and incompatible) API. To avoid confusion, the original
library is referred to as PCRE1 below. For information about testing PCRE2, see
the pcre2test documentation and the comment at the head of the RunTest file.
PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
releases carried on the 8.xx series, up to the final 8.45 release. PCRE2
releases started at 10.00 to avoid confusion with PCRE1.
Historical note 1
-----------------
Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. The rather simple patterns were not Unix-like in
form, and were quite restricted in what they could do by comparison with Perl.
The interesting part about the algorithm was that the amount of space required
to hold the compiled form of an expression was known in advance. The code to
apply an expression did not operate by backtracking, as the original Henry
Spencer code and the current PCRE2 pcre2_match() function and Perl code do, but
instead checked all possibilities simultaneously by keeping a list of current
states and checking all of them as it advanced through the subject string. In
the terminology of Jeffrey Friedl's book, it was a "DFA algorithm", though it
was not a traditional Finite State Machine (FSM). When the pattern was all used
up, all remaining states were possible matches, and the one matching the
longest subset of the subject string was chosen. This did not necessarily
maximize the individual wild portions of the pattern, as is expected in Unix
and Perl-style regular expressions.
Historical note 2
-----------------
The code originally written by Henry Spencer (which was subsequently heavily
modified for Perl) compiles the expression twice: once in a dummy mode in order
to find out how much store will be needed, and then for real. (The Perl version
may or may not still do this; I'm talking about the original library.) The
execution function operates by backtracking and maximizing (or, optionally,
minimizing, in Perl) the amount of the subject that matches individual wild
portions of the pattern. This is an "NFA algorithm" in Friedl's terminology.
Support for 16-bit and 32-bit data strings
-------------------------------------------
The PCRE2 library can be compiled in any combination of 8-bit, 16-bit or 32-bit
modes, creating up to three different libraries. In the description that
follows, the word "short" is used for a 16-bit data quantity, and the phrase
"code unit" is used for a quantity that is a byte in 8-bit mode, a short in
16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are
given in generic form, without the _8, _16, or _32 suffix.
Computing the memory requirement: how it was
--------------------------------------------
Up to and including release 6.7, PCRE1 worked by running a very degenerate
first pass to calculate a maximum memory requirement, and then a second pass to
do the real compile - which might use a bit less than the predicted amount of
memory. The idea was that this would turn out faster than the Henry Spencer
code because the first pass is degenerate and the second pass can just store
stuff straight into memory, which it knows is big enough.
Computing the memory requirement: how it is
-------------------------------------------
By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
in most cases only ever using a small amount of working memory, and without too
many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source and made
further maintenance and development easier. As this was such a major change, I
never released 6.8, instead upping the number to 7.0 (other quite major changes
were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there was a downside: compiling ran
more slowly than before (30% or more, depending on the pattern) because it now
did a full analysis of the pattern twice. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by the compile function, which uses recursive function calls
for nested parenthesized groups. This is a safety feature for environments with
small stacks where the patterns are provided by users.
Yet another pattern scan
------------------------
History repeated itself for PCRE2 release 10.20. A number of bugs relating to
named subpatterns had been discovered by fuzzers. Most of these were related to
the handling of forward references when it was not known if the named group was
unique. (References to non-unique names use a different opcode and more
memory.) The use of duplicate group numbers (the (?| facility) also caused
issues.
To get around these problems I adopted a new approach by adding a third pass
over the pattern (really a "pre-pass"), which does nothing other than identify
all the named subpatterns and their corresponding group numbers. This means
that the actual compile (both the memory-computing dummy run and the real
compile) has full knowledge of group names and numbers throughout. Several
dozen lines of messy code were eliminated, though the new pre-pass was not
short. In particular, parsing and skipping over [] classes is complicated.
While working on 10.22 I realized that I could simplify yet again by moving
more of the parsing into the pre-pass, thus avoiding doing it in two places, so
after 10.22 was released, the code underwent yet another big refactoring. This
is how it is from 10.23 onwards:
The function called parse_regex() scans the pattern characters, parsing them
into literal data and meta characters. It converts escapes such as \x{123}
into literals, handles \Q...\E, and skips over comments and non-significant
white space. The result of the scanning is put into a vector of 32-bit unsigned
integers. Values less than 0x80000000 are literal data. Higher values represent
meta-characters. The top 16-bits of such values identify the meta-character,
and these are given names such as META_CAPTURE. The lower 16-bits are available
for data, for example, the capturing group number. The only situation in which
literal data values greater than 0x7fffffff can appear is when the 32-bit
library is running in non-UTF mode. This is handled by having a special
meta-character that is followed by the 32-bit data value.
The size of the parsed pattern vector, when auto-callouts are not enabled, is
bounded by the length of the pattern (with one exception). The code is written
so that each item in the pattern uses no more vector elements than the number
of code units in the item itself. The exception is the aforementioned large
32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in
advance to check for such values. When auto-callouts are enabled, the generous
assumption is made that there will be a callout for each pattern code unit
(which of course is only actually true if all code units are literals) plus one
at the end. A default parsed pattern vector is defined on the system stack, to
minimize memory handling, but if this is not big enough, heap memory is used.
If there are any lookbehinds in the pattern, the parsed pattern is scanned in
order to work out their lengths. Then the actual compiling function is run
twice, the first time to determine the amount of memory needed for the final
compiled pattern. The compiling function processes the parsed pattern vector,
not the pattern itself, although some of the parsed items refer to strings in
the pattern - for example, group names.
Some post-processing of the compiled pattern takes place. If there are any
recursion or subroutine calls, there is a scan to convert them into offsets.
Then there are other scans to apply certain optimizations, some of which can be
disabled by setting appropriate options.
Most errors can be diagnosed during the parsing scan. For those that cannot,
the parsed code contains offsets into the pattern so that the actual compiling
code can report where the errors are.
The elements of the parsed pattern vector
-----------------------------------------
The word "offset" below means a code unit offset into the pattern. When
PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is
stored in a single parsed pattern element. Otherwise (typically on 64-bit
systems) it occupies two elements. The following meta items occupy just one
element, with no data:
META_ACCEPT (*ACCEPT)
META_ASTERISK *
META_ASTERISK_PLUS *+
META_ASTERISK_QUERY *?
META_ATOMIC (?> start of atomic group
META_CIRCUMFLEX ^ metacharacter
META_CLASS [ start of non-empty class
META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
META_CLASS_EMPTY_NOT [^] negative empty class - ditto
META_CLASS_END ] end of non-empty class
META_CLASS_NOT [^ start non-empty negative class
META_COMMIT (*COMMIT) - no argument (see below for with argument)
META_COND_ASSERT (?(?assertion)
META_DOLLAR $ metacharacter
META_DOT . metacharacter
META_END End of pattern (this value is 0x80000000)
META_FAIL (*FAIL)
META_KET ) closing parenthesis
META_LOOKAHEAD (?= start of lookahead
META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead
META_LOOKAHEADNOT (?! start of negative lookahead
META_NOCAPTURE (?: no capture parens
META_PLUS +
META_PLUS_PLUS ++
META_PLUS_QUERY +?
META_PRUNE (*PRUNE) - no argument (see below for with argument)
META_QUERY ?
META_QUERY_PLUS ?+
META_QUERY_QUERY ??
META_RANGE_ESCAPED hyphen in class range with at least one escape
META_RANGE_LITERAL hyphen in class range defined literally
META_SKIP (*SKIP) - no argument (see below for with argument)
META_THEN (*THEN) - no argument (see below for with argument)
META_ECLASS_AND && (or &) in an extended character class
META_ECLASS_OR || (or |, +) in an extended character class
META_ECLASS_SUB -- (or -) in an extended character class
META_ECLASS_XOR ~~ (or ^) in an extended character class
META_ECLASS_NOT ! in an extended character class
The two RANGE values occur only in character classes. They are positioned
between two literals that define the start and end of the range. In an EBCDIC
environment it is necessary to know whether either of the range values was
specified as an escape. In an ASCII/Unicode environment the distinction is not
relevant.
The following have data in the lower 16 bits, and may be followed by other data
elements:
META_ALT | alternation
META_BACKREF back reference
META_CAPTURE start of capturing group
META_ESCAPE non-literal escape sequence
META_RECURSE recursion call
If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
is the maximum length of its branch (see META_LOOKBEHIND below for more
detail).
META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
their data in the lower 16 bits of the element. META_RECURSE is followed by an
offset, for use in error messages.
META_BACKREF is followed by an offset if the back reference group number is 10
or more. The offsets of the first occurrences of references to groups whose
numbers are less than 10 are put in cb->small_ref_offset[] (only the first
occurrence is useful). On 64-bit systems this avoids using more than two parsed
pattern elements for items such as \3. The offset is used when an error occurs
because the reference is to a non-existent group.
META_ESCAPE is used for escapes such as \d that match a character. It has an
ESC_xxx value as its data. For ESC_P and ESC_p, the next element contains the
16-bit type and data property values, packed together. Escape sequences such as
\g and \k are turned into other items like META_RECURSE or META_BACKREF and
their ESC_xxx values never occur with META_ESCAPE.
The following have one data item that follows in the next vector element:
META_BIGVALUE Next is a literal >= META_END
META_POSIX POSIX class item (data identifies the class)
META_POSIX_NEG negative POSIX class item (ditto)
The following are followed by a length element, then a number of character code
values (which should match with the length):
META_MARK (*MARK:xxxx)
META_COMMIT_ARG )*COMMIT:xxxx)
META_PRUNE_ARG (*PRUNE:xxx)
META_SKIP_ARG (*SKIP:xxxx)
META_THEN_ARG (*THEN:xxxx)
The following are followed by a length element, then an offset in the pattern
that identifies the name:
META_COND_NAME (?(<name>) or (?('name') or (?(name)
META_COND_RNAME (?(R&name)
META_COND_RNUMBER (?(Rdigits)
META_RECURSE_BYNAME (?&name)
META_BACKREF_BYNAME \k'name' or \k<name> or \k{name} or \g{name}
META_SCS_NAME (*scs:(<name>)...)
META_COND_RNUMBER is used for names that start with R and continue with digits,
because this is an ambiguous case. It could be a back reference to a group with
that name, or it could be a recursion test on a numbered group.
These are followed by an offset, for use in error messages, then a number:
META_COND_NUMBER (?([+-]digits)
META_SCS_NUMBER (*scs:(digits)...)
The following is followed just by an offset, for use in error messages:
META_COND_DEFINE (?(DEFINE)
The following are at first also followed just by an offset for use in error
messages. After the lengths of the branches of a lookbehind group have been
checked the error offset is no longer needed. The lower 16 bits of the main
word are now set to the maximum length of the first branch of the lookbehind
group, and the second word is set to the minimum matching length for a
variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose
branches are all of fixed length. These values are used when generating
OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used
for any subsequent branches because there is only room for one value (the
branch maximum length) in a META_ALT item.
META_LOOKBEHIND (?<= start of lookbehind
META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind
META_LOOKBEHINDNOT (?<! start of negative lookbehind
The following are followed by two elements, the minimum and maximum. The
maximum value is limited to 65535 (MAX_REPEAT_COUNT). A maximum value of
"unlimited" is represented by REPEAT_UNLIMITED, which is bigger than it:
META_MINMAX {n,m} repeat
META_MINMAX_PLUS {n,m}+ repeat
META_MINMAX_QUERY {n,m}? repeat
This one is followed by two elements, giving the new option settings for the
main and extra options, respectively.
META_OPTIONS (?i) and friends
This one is followed by three elements. The first is 0 for '>' and 1 for '>=';
the next two are the major and minor numbers:
META_COND_VERSION (?(VERSION<op>x.y)
Callouts are converted into one of two items:
META_CALLOUT_NUMBER (?C with numerical argument
META_CALLOUT_STRING (?C with string argument
In both cases, the next two elements contain the offset and length of the next
item in the pattern. Then there is either one callout number, or a length and
an offset for the string argument. The length includes both delimiters.
Traditional matching function
-----------------------------
The "traditional", and original, matching function is called pcre2_match(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE2
will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
support, and studying a compiled pattern with JIT is successful, the JIT code
is run instead of the normal pcre2_match() code, but the result is the same.
The interpreter used to implement backtracking by means of recursive function
calls, but this gave rise to regular complaints when patterns with large search
trees ran out of stack. There was for a while a fudge that used the heap
instead, but this was inefficient and slow. In 2017 I re-wrote pcre2_match() as
a single, non-recursive function that implements backtracking via a vector of
"frames" on the heap, each frame representing a backtracking point. As well as
standard information such as the position in the pattern and position in the
subject, each frame has a number of unassigned variables that can be used
locally to preserve values at a backtracking point. C macros are used
extensively to implement all of this.
Supplementary matching function
-------------------------------
There is a supplementary matching function called pcre2_dfa_match() that
implements a DFA matching algorithm that searches simultaneously for all
possible matches that start at one point in the subject string. (Going back to
my roots: see Historical Note 1 above.) This function intreprets the same
compiled pattern data as pcre2_match(); however, not all the facilities are
available, and those that are do not always work in quite the same way. In
particular, capturing parentheses and backreferences are not supported. See the
user documentation for details.
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
some others may be changed in the middle of patterns by items such as (?i).
Their processing is handled entirely at compile time by generating different
opcodes for the different settings. Some options are copied into the opcode's
data, for opcodes such as OP_REFI which depends on the (?r)
(PCRE2_EXTRA_CASELESS_RESTRICT) option. The runtime functions do not need to
keep track of an option's state.
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
are tracked and processed during the parsing pre-pass. The others are handled
from META_OPTIONS items during the main compile phase.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of unsigned code units (bytes in
8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing
items of variable length. The first code unit in an item contains an opcode,
and the length of the item is either implicit in the opcode or contained in the
data that follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values,
and the 16-bit library can be compiled to use 4-byte values, though this
impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries
is necessary only when patterns whose compiled length is greater than 65535
code units are going to be processed. When a LINK_SIZE value uses more than one
code unit, the most significant unit is first.
In this description, we assume the "normal" compilation options. Data values
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
(most significant byte first), and one code unit in 16-bit and 32-bit modes.
Opcodes with no following data
------------------------------
These items are all just one code unit long:
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single code unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D
OP_DIGIT \d
OP_NOT_HSPACE \H
OP_HSPACE \h
OP_NOT_WHITESPACE \S
OP_WHITESPACE \s
OP_NOT_VSPACE \V
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, each followed by a number that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match. The assertion (?!) is
always optimized to OP_FAIL.
OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in
non-UTF modes and in UTF-32 mode (since one code unit still equals one
character). Another use is for [^] when empty classes are permitted
(PCRE2_ALLOW_EMPTY_CLASS is set).
Backtracking control verbs
--------------------------
Verbs with no arguments generate opcodes with no following data (as listed
in the section above).
(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
length in one code unit, and followed by a binary zero. The name length is
limited by the size of the code unit.
(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and
(*MARK:NAME)(*FAIL) respectively.
For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes
OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the
name following in the same format as for OP_MARK.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching of characters that have at most two
case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the
character may be more than one code unit long. In UTF-32 mode, characters are
always exactly one code unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Caseless matching (positive or negative) of characters that have more than two
case-equivalent code points (which is possible only in UTF mode) is handled by
compiling a Unicode property item (see below), with the pseudo-property
PT_CLIST. The value of this property is an offset in a vector called
"ucd_caseless_sets" which identifies the start of a short list of case
equivalent characters, terminated by the value NOTACHAR (0xffffffff).
Repeating single characters
---------------------------
The common repeats (*, +, ?), when applied to a single character, use the
following opcodes, which come in caseful and caseless versions:
Caseful Caseless
OP_STAR OP_STARI
OP_MINSTAR OP_MINSTARI
OP_POSSTAR OP_POSSTARI
OP_PLUS OP_PLUSI
OP_MINPLUS OP_MINPLUSI
OP_POSPLUS OP_POSPLUSI
OP_QUERY OP_QUERYI
OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII or
UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the
length is variable. Those with "MIN" in their names are the minimizing
versions. Those with "POS" in their names are possessive versions. Other kinds
of repeat make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
OP_MINUPTO OP_MINUPTOI
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a count and then the repeated character. The count
is two bytes long in 8-bit mode (most significant byte first), or one code unit
in 16-bit and 32-bit modes.
OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum
and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or
OP_MINUPTO or OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
-------------------------
Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored
in the next code unit. The opcodes are:
OP_TYPESTAR
OP_TYPEMINSTAR
OP_TYPEPOSSTAR
OP_TYPEPLUS
OP_TYPEMINPLUS
OP_TYPEPOSPLUS
OP_TYPEQUERY
OP_TYPEMINQUERY
OP_TYPEPOSQUERY
OP_TYPEUPTO
OP_TYPEMINUPTO
OP_TYPEPOSUPTO
OP_TYPEEXACT
Match by Unicode property
-------------------------
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two code units that encode the desired property as a type
and a value. The types are a set of #defines of the form PT_xxx, and the values
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
The value is relevant only for PT_GC (General Category), PT_PC (Particular
Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property),
and the pseudo-property PT_CLIST, which is used to identify a list of
case-equivalent characters when there are three or more (see above).
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
value.
Character classes
-----------------
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]), except when caselessly matching a character that has more
than two case-equivalent code points (which can happen only in UTF mode). In
this case a Unicode property item is used, as described above in "Matching
literal characters".
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and
16-bit and 32-bit modes, subject characters with values greater than 255 can be
handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they
do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
code points are less than 256. After the bit map, the properties of the
character class are listed, if they are present. The items in the list
follows the declaration order of the pattern string. The property list
is followed by single characters and/or character ranges, if they are
present. The characters/ranges are sorted in ascending order, and at
least one non-matching character must be present between any two of
them. In caseless mode, all equivalent characters are explicitly listed.
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
opcode and its data. This is followed by a code unit containing flag bits:
XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a
bit map is present. There follows the bit map, if XCL_MAP is set, and then a
sequence of items coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 255, it is split into two ranges, with characters less than 256 being
indicated in the bit map, and the rest with XCL_RANGE.
When XCL_NOT is set, the bit map, if present, contains bits for characters that
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
specifies characters and properties that are not allowed.
The meaning of the bitmap indicated by XCL_MAP is that, if one is present, then
it fully describes which code points < 256 match the class (without needing to
invert the check according to XCL_NOT); the other items in the OP_XCLASS need
not be consulted. However, if a bitmap is not present, then code points < 256
may still match, so the other items in the OP_XCLASS must be consulted.
For classes containing logical expressions, such as "[\p{Greek} && \p{Lu}]" for
"uppercase Greek letters", OP_ECLASS is used. The expression is encoded as a a
stack-based series of operands and operators, in Reverse Polish Notation. Like
an OP_XCLASS, the OP_ECLASS is first followed by a LINK_SIZE value containing
the total length of the opcode and its data. That is followed by a code unit
containing flags: currently just ECL_MAP indicating that a bit map is present.
There follows the bit map, if ECL_MAP is set. Finally, there is a sequence of
items that are either an operand or operator. Each item starts with a single
code unit containing its type:
ECL_AND AND; no additional data
ECL_OR OR; no additional data
ECL_XOR XOR; no additional data
ECL_NOT NOT; no additional data
ECL_XCLASS The additional data which follows ECL_XCLASS is the same as for
an OP_XCLASS, except that this data is preceded by ECL_XCLASS
rather than OP_XCLASS.
Because the OP_ECLASS has its own bitmap (if required), an
ECL_XCLASS should not contain a bitmap.
Additionally, there are two intermediate values used during compilation, but
these are folded away during generation of the opcode, and so never appear
inside an OP_ECLASS at match time. They are:
ECL_ANY match all characters; no additional data
ECL_NONE match no characters; no additional data
The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP.
If the bitmap is present, all codepoints < 256 are checked against the bitmap.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number when the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference to such a group by name
generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
(not the byte offset) in the group name table of the first entry for the
required name, followed by the number of groups with the same name. The
matching code can then search for the first one that is set.
OP_REFI and OP_DNREFI are further followed by an item containing any
case-insensitivity flags.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of these:
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last three are single-code-unit items, with no data. The range
opcodes are followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A
bracket opcode is followed by a LINK_SIZE value which gives the offset to the
next alternative OP_ALT or, if there aren't any branches, to the terminating
opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the
next one, or to the final opcode. For capturing brackets, the bracket number is
a count that immediately follows the offset.
There are several opcodes that mark the end of a subpattern group. OP_KET is
used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively, and OP_KETRPOS for possessive repetitions (see below for more
details). All four are followed by a LINK_SIZE value giving (as a positive
number) the offset back to the matching opening bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid match. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the pattern.
A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.
A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
has the same number.
When a repeated subpattern has an unbounded upper limit, it is checked to see
whether it could match an empty string. If this is the case, the opcode in the
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode OP_ONCE.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there is just this one opcode for atomic groups.
Assertions
----------
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
OP_ASSERT_NOT.
Backward assertions use the opcodes OP_ASSERTBACK, OP_ASSERTBACK_NA, and
OP_ASSERTBACK_NOT. If all the branches of a backward assertion are of fixed
length (not necessarily the same), the first opcode inside each branch is
OP_REVERSE, followed by an IMM2_SIZE count of the number of characters to move
back the pointer in the subject string, thus allowing each branch to have a
different (but fixed) length.
Variable-length backward assertions whose maximum matching length is limited
are also supported. For such assertions, the first opcode inside each branch is
OP_VREVERSE, followed by the minimum and maximum lengths for that branch,
unless these happen to be equal, in which case OP_REVERSE is used. These
IMM2_SIZE values occupy two code units each in 8-bit mode, and 1 code unit in
16/32 bit modes.
In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are
also the number of code units, but in UTF-8/16 mode each character may occupy
more than one code unit.
The "scan substring" assertion compiles as OP_ASSERT_SCS. This opcode is
followed by a list of arguments. Each argument is either an OP_CREF or
OP_DNCREF byte code sequence. The details of these sequences are described
in the next section.
For example (*scs:(1,'NAME')...PATTERN...) is translated to:
[OP_ASSERT_SCS] [OP_CREF] [OP_CREF] ...PATTERN... [OP_KET]
If 'NAME' is a duplicated name, the second [OP_CREF] is [OP_DNCREF] instead.
Conditional subpatterns
-----------------------
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat.
If the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name. The allows the
matcher to check if any group with the given name is set.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the
whole pattern") or OP_DNRREF (with data as for OP_DNCREF).
For a DEFINE condition, OP_FALSE is used (with no associated data). During
compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when
the conditional group is complete, there can be a check to ensure that it
contains only one top-level branch. Once this has happened, the opcode is
changed to OP_FALSE, so the matcher never sees OP_DEFINE.
There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which
tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
or OP_FALSE.
If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
must start with a parenthesized atomic assertion, whose opcode normally
immediately follows OP_COND or OP_SCOND. However, if automatic callouts are
enabled, a callout is inserted immediately before the assertion. It is also
possible to insert a manual callout at this point. Only assertion conditions
may have callouts preceding the condition.
A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
parts of the pattern, so this is another opcode that may appear as a condition.
It is treated the same as OP_FALSE.
Recursion
---------
Recursion either matches the current pattern, or some subexpression. The opcode
OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. OP_RECURSE is also used for
"subroutine" calls, even though they are not strictly a recursion. Up till
release 10.30 recursions were treated as atomic groups, making them
incompatible with Perl (but PCRE had them well before Perl did). From 10.30,
backtracking into recursions is supported.
Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only
forced no backtracking, but also allowed repetition to be handled as for other
bracketed groups. From 10.30 onwards, repeated recursions are duplicated for
their minimum repetitions, and then wrapped in non-capturing brackets for the
remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is
treated as (?1)(?1)(?:(?1)){0,2}.
Callouts
--------
A callout may have either a numerical argument or a string argument. These use
OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by
two LINK_SIZE values giving the offset in the pattern string to the start of
the following item, and another count giving the length of this item. These
values make it possible for pcre2test to output useful tracing information
using callouts.
In the case of a numeric callout, after these two values there is a single code
unit containing the callout number, in the range 0-255, with 255 being used for
callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT
option. Thus, this opcode item is of fixed length:
[OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER]
For callouts with string arguments, OP_CALLOUT_STR has three more data items:
a LINK_SIZE value giving the complete length of the entire opcode item, a
LINK_SIZE item containing the offset within the pattern string to the start of
the string argument, and the string itself, preceded by its starting delimiter
and followed by a binary zero. When a callout function is called, a pointer to
the actual string is passed, but the delimiter can be accessed as string[-1] if
the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is
compiled as the following bytes (decimal numbers represent binary values):
[OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0]
-------- ------- -------- -------
| | | |
------- LINK_SIZE items ------
Opcode table checking
---------------------
The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
not a real opcode, but is used to check at compile time that tables indexed by
opcode are the correct length, in order to catch updating errors.
See also
--------
The file maint/README contains additional information.
Philip Hazel
August 2024

View File

@@ -0,0 +1,368 @@
Installation Instructions
*************************
Copyright (C) 1994-1996, 1999-2002, 2004-2017, 2020-2021 Free
Software Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation
==================
Briefly, the shell command './configure && make && make install'
should configure, build, and install this package. The following
more-detailed instructions are generic; see the 'README' file for
instructions specific to this package. Some packages provide this
'INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The 'configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a 'Makefile' in each directory of the package.
It may also create one or more '.h' files containing system-dependent
definitions. Finally, it creates a shell script 'config.status' that
you can run in the future to recreate the current configuration, and a
file 'config.log' containing compiler output (useful mainly for
debugging 'configure').
It can also use an optional file (typically called 'config.cache' and
enabled with '--cache-file=config.cache' or simply '-C') that saves the
results of its tests to speed up reconfiguring. Caching is disabled by
default to prevent problems with accidental use of stale cache files.
If you need to do unusual things to compile the package, please try
to figure out how 'configure' could check whether to do them, and mail
diffs or instructions to the address given in the 'README' so they can
be considered for the next release. If you are using the cache, and at
some point 'config.cache' contains results you don't want to keep, you
may remove or edit it.
The file 'configure.ac' (or 'configure.in') is used to create
'configure' by a program called 'autoconf'. You need 'configure.ac' if
you want to change it or regenerate 'configure' using a newer version of
'autoconf'.
The simplest way to compile this package is:
1. 'cd' to the directory containing the package's source code and type
'./configure' to configure the package for your system.
Running 'configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type 'make' to compile the package.
3. Optionally, type 'make check' to run any self-tests that come with
the package, generally using the just-built uninstalled binaries.
4. Type 'make install' to install the programs and any data files and
documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the 'make install' phase executed with root
privileges.
5. Optionally, type 'make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior 'make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing 'make clean'. To also remove the
files that 'configure' created (so you can compile the package for
a different kind of computer), type 'make distclean'. There is
also a 'make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
7. Often, you can also type 'make uninstall' to remove the installed
files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide 'make
distcheck', which can by used by developers to test that all other
targets like 'make install' and 'make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the 'configure' script does not know about. Run './configure --help'
for details on some of the pertinent environment variables.
You can give 'configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here is
an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU 'make'. 'cd' to the
directory where you want the object files and executables to go and run
the 'configure' script. 'configure' automatically checks for the source
code in the directory that 'configure' is in and in '..'. This is known
as a "VPATH" build.
With a non-GNU 'make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use 'make distclean' before
reconfiguring for another architecture.
On MacOS X 10.5 and later systems, you can create libraries and
executables that work on multiple system types--known as "fat" or
"universal" binaries--by specifying multiple '-arch' options to the
compiler but only a single '-arch' option to the preprocessor. Like
this:
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CPP="gcc -E" CXXCPP="g++ -E"
This is not guaranteed to produce working output in all cases, you
may have to build one architecture at a time and combine the results
using the 'lipo' tool if you have problems.
Installation Names
==================
By default, 'make install' installs the package's commands under
'/usr/local/bin', include files under '/usr/local/include', etc. You
can specify an installation prefix other than '/usr/local' by giving
'configure' the option '--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option '--exec-prefix=PREFIX' to 'configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like '--bindir=DIR' to specify different values for particular
kinds of files. Run 'configure --help' for a list of the directories
you can set and what kinds of files go in them. In general, the default
for these options is expressed in terms of '${prefix}', so that
specifying just '--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to 'configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
'make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, 'make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
'${prefix}'. Any directories that were specified during 'configure',
but not in terms of '${prefix}', must each be overridden at install time
for the entire installation to be relocated. The approach of makefile
variable overrides for each directory variable is required by the GNU
Coding Standards, and ideally causes no recompilation. However, some
platforms have known limitations with the semantics of shared libraries
that end up requiring recompilation when using this method, particularly
noticeable in packages that use GNU Libtool.
The second method involves providing the 'DESTDIR' variable. For
example, 'make install DESTDIR=/alternate/directory' will prepend
'/alternate/directory' before all installation names. The approach of
'DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of '${prefix}'
at 'configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving 'configure' the
option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'.
Some packages pay attention to '--enable-FEATURE' options to
'configure', where FEATURE indicates an optional part of the package.
They may also pay attention to '--with-PACKAGE' options, where PACKAGE
is something like 'gnu-as' or 'x' (for the X Window System). The
'README' should mention any '--enable-' and '--with-' options that the
package recognizes.
For packages that use the X Window System, 'configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the 'configure' options '--x-includes=DIR' and
'--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of 'make' will be. For these packages, running './configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with 'make V=1'; while running './configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with 'make V=0'.
Particular systems
==================
On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC
is not installed, it is recommended to use the following options in
order to use an ANSI C compiler:
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
HP-UX 'make' updates targets which have the same timestamps as their
prerequisites, which makes it generally unusable when shipped generated
files such as 'configure' are involved. Use GNU 'make' instead.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its '<wchar.h>' header file. The option '-nodtk' can be used as a
workaround. If GNU CC is not installed, it is therefore recommended to
try
./configure CC="cc"
and if that doesn't work, try
./configure CC="cc -nodtk"
On Solaris, don't put '/usr/ucb' early in your 'PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in '/usr/bin'. So, if you need '/usr/ucb'
in your 'PATH', put it _after_ '/usr/bin'.
On Haiku, software installed for all users goes in '/boot/common',
not '/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type
==========================
There may be some features 'configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, 'configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
'--build=TYPE' option. TYPE can either be a short name for the system
type, such as 'sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS
KERNEL-OS
See the file 'config.sub' for the possible values of each field. If
'config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option '--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with '--host=TYPE'.
Sharing Defaults
================
If you want to set default values for 'configure' scripts to share,
you can create a site shell script called 'config.site' that gives
default values for variables like 'CC', 'cache_file', and 'prefix'.
'configure' looks for 'PREFIX/share/config.site' if it exists, then
'PREFIX/etc/config.site' if it exists. Or, you can set the
'CONFIG_SITE' environment variable to the location of the site script.
A warning: not all 'configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to 'configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the 'configure' command line, using 'VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified 'gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an
Autoconf limitation. Until the limitation is lifted, you can use this
workaround:
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
'configure' Invocation
======================
'configure' recognizes the following options to control how it
operates.
'--help'
'-h'
Print a summary of all of the options to 'configure', and exit.
'--help=short'
'--help=recursive'
Print a summary of the options unique to this package's
'configure', and exit. The 'short' variant lists options used only
in the top level, while the 'recursive' variant lists options also
present in any nested packages.
'--version'
'-V'
Print the version of Autoconf used to generate the 'configure'
script, and exit.
'--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally 'config.cache'. FILE defaults to '/dev/null' to
disable caching.
'--config-cache'
'-C'
Alias for '--cache-file=config.cache'.
'--quiet'
'--silent'
'-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to '/dev/null' (any error
messages will still be shown).
'--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
'configure' can determine that directory automatically.
'--prefix=DIR'
Use DIR as the installation prefix. *note Installation Names:: for
more details, including other options available for fine-tuning the
installation locations.
'--no-create'
'-n'
Run the configure checks, but stop before creating any output
files.
'configure' also accepts some other, not widely useful, options. Run
'configure --help' for more details.

View File

@@ -0,0 +1,103 @@
PCRE2 License
=============
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|---------|-------|
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
licence, as specified below, with one exemption for certain binary
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
is distributed under the same terms as the software itself. The data in the
testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
optimize pattern matching. This is an optional feature that can be omitted when
the library is built.
COPYRIGHT
---------
### The basic library functions
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Retired from University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2007 University of Cambridge
Copyright (c) 2007-2024 Philip Hazel
All rights reserved.
### PCRE2 Just-In-Time compilation support
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright (c) 2010-2024 Zoltan Herczeg
All rights reserved.
### Stack-less Just-In-Time compiler
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright (c) 2009-2024 Zoltan Herczeg
All rights reserved.
### All other contributions
Many other contributors have participated in the authorship of PCRE2. As PCRE2
has never required a Contributor Licensing Agreement, or other copyright
assignment agreement, all contributions have copyright retained by each
original contributor or their employer.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notices,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
------------------------------------------
The second condition in the BSD licence (covering binary redistributions) does
not apply all the way down a chain of software. If binary package A includes
PCRE2, it must respect the condition, but if package B is software that
includes package A, the condition is not imposed on package B unless it uses
PCRE2 independently.
End

View File

@@ -0,0 +1,9 @@
module(
name = "pcre2",
version = "10.45",
compatibility_level = 1,
)
bazel_dep(name = "rules_cc", version = "0.0.1")
bazel_dep(name = "bazel_skylib", version = "1.2.1")
bazel_dep(name = "platforms", version = "0.0.4")

View File

@@ -0,0 +1,978 @@
## Process this file with automake to produce Makefile.in.
AUTOMAKE_OPTIONS = subdir-objects
ACLOCAL_AMFLAGS = -I m4
## This seems to have become necessary for building in non-source directory.
AM_CPPFLAGS="-I$(srcdir)/src"
## Specify the documentation files that are distributed.
dist_doc_DATA = \
AUTHORS.md \
COPYING \
ChangeLog \
LICENCE.md \
NEWS \
README \
SECURITY.md \
doc/pcre2.txt \
doc/pcre2-config.txt \
doc/pcre2grep.txt \
doc/pcre2test.txt
dist_html_DATA = \
doc/html/NON-AUTOTOOLS-BUILD.txt \
doc/html/README.txt \
doc/html/index.html \
doc/html/pcre2-config.html \
doc/html/pcre2.html \
doc/html/pcre2_callout_enumerate.html \
doc/html/pcre2_code_copy.html \
doc/html/pcre2_code_copy_with_tables.html \
doc/html/pcre2_code_free.html \
doc/html/pcre2_compile.html \
doc/html/pcre2_compile_context_copy.html \
doc/html/pcre2_compile_context_create.html \
doc/html/pcre2_compile_context_free.html \
doc/html/pcre2_config.html \
doc/html/pcre2_convert_context_copy.html \
doc/html/pcre2_convert_context_create.html \
doc/html/pcre2_convert_context_free.html \
doc/html/pcre2_converted_pattern_free.html \
doc/html/pcre2_dfa_match.html \
doc/html/pcre2_general_context_copy.html \
doc/html/pcre2_general_context_create.html \
doc/html/pcre2_general_context_free.html \
doc/html/pcre2_get_error_message.html \
doc/html/pcre2_get_mark.html \
doc/html/pcre2_get_match_data_heapframes_size.html \
doc/html/pcre2_get_match_data_size.html \
doc/html/pcre2_get_ovector_count.html \
doc/html/pcre2_get_ovector_pointer.html \
doc/html/pcre2_get_startchar.html \
doc/html/pcre2_jit_compile.html \
doc/html/pcre2_jit_free_unused_memory.html \
doc/html/pcre2_jit_match.html \
doc/html/pcre2_jit_stack_assign.html \
doc/html/pcre2_jit_stack_create.html \
doc/html/pcre2_jit_stack_free.html \
doc/html/pcre2_maketables.html \
doc/html/pcre2_maketables_free.html \
doc/html/pcre2_match.html \
doc/html/pcre2_match_context_copy.html \
doc/html/pcre2_match_context_create.html \
doc/html/pcre2_match_context_free.html \
doc/html/pcre2_match_data_create.html \
doc/html/pcre2_match_data_create_from_pattern.html \
doc/html/pcre2_match_data_free.html \
doc/html/pcre2_pattern_convert.html \
doc/html/pcre2_pattern_info.html \
doc/html/pcre2_serialize_decode.html \
doc/html/pcre2_serialize_encode.html \
doc/html/pcre2_serialize_free.html \
doc/html/pcre2_serialize_get_number_of_codes.html \
doc/html/pcre2_set_bsr.html \
doc/html/pcre2_set_callout.html \
doc/html/pcre2_set_character_tables.html \
doc/html/pcre2_set_compile_extra_options.html \
doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_depth_limit.html \
doc/html/pcre2_set_glob_escape.html \
doc/html/pcre2_set_glob_separator.html \
doc/html/pcre2_set_heap_limit.html \
doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_max_pattern_compiled_length.html \
doc/html/pcre2_set_max_pattern_length.html \
doc/html/pcre2_set_max_varlookbehind.html \
doc/html/pcre2_set_offset_limit.html \
doc/html/pcre2_set_optimize.html \
doc/html/pcre2_set_newline.html \
doc/html/pcre2_set_parens_nest_limit.html \
doc/html/pcre2_set_recursion_limit.html \
doc/html/pcre2_set_recursion_memory_management.html \
doc/html/pcre2_set_substitute_callout.html \
doc/html/pcre2_set_substitute_case_callout.html \
doc/html/pcre2_substitute.html \
doc/html/pcre2_substring_copy_byname.html \
doc/html/pcre2_substring_copy_bynumber.html \
doc/html/pcre2_substring_free.html \
doc/html/pcre2_substring_get_byname.html \
doc/html/pcre2_substring_get_bynumber.html \
doc/html/pcre2_substring_length_byname.html \
doc/html/pcre2_substring_length_bynumber.html \
doc/html/pcre2_substring_list_free.html \
doc/html/pcre2_substring_list_get.html \
doc/html/pcre2_substring_nametable_scan.html \
doc/html/pcre2_substring_number_from_name.html \
doc/html/pcre2api.html \
doc/html/pcre2build.html \
doc/html/pcre2callout.html \
doc/html/pcre2compat.html \
doc/html/pcre2convert.html \
doc/html/pcre2demo.html \
doc/html/pcre2grep.html \
doc/html/pcre2jit.html \
doc/html/pcre2limits.html \
doc/html/pcre2matching.html \
doc/html/pcre2partial.html \
doc/html/pcre2pattern.html \
doc/html/pcre2perform.html \
doc/html/pcre2posix.html \
doc/html/pcre2sample.html \
doc/html/pcre2serialize.html \
doc/html/pcre2syntax.html \
doc/html/pcre2test.html \
doc/html/pcre2unicode.html
dist_man_MANS = \
doc/pcre2-config.1 \
doc/pcre2.3 \
doc/pcre2_callout_enumerate.3 \
doc/pcre2_code_copy.3 \
doc/pcre2_code_copy_with_tables.3 \
doc/pcre2_code_free.3 \
doc/pcre2_compile.3 \
doc/pcre2_compile_context_copy.3 \
doc/pcre2_compile_context_create.3 \
doc/pcre2_compile_context_free.3 \
doc/pcre2_config.3 \
doc/pcre2_convert_context_copy.3 \
doc/pcre2_convert_context_create.3 \
doc/pcre2_convert_context_free.3 \
doc/pcre2_converted_pattern_free.3 \
doc/pcre2_dfa_match.3 \
doc/pcre2_general_context_copy.3 \
doc/pcre2_general_context_create.3 \
doc/pcre2_general_context_free.3 \
doc/pcre2_get_error_message.3 \
doc/pcre2_get_mark.3 \
doc/pcre2_get_match_data_heapframes_size.3 \
doc/pcre2_get_match_data_size.3 \
doc/pcre2_get_ovector_count.3 \
doc/pcre2_get_ovector_pointer.3 \
doc/pcre2_get_startchar.3 \
doc/pcre2_jit_compile.3 \
doc/pcre2_jit_free_unused_memory.3 \
doc/pcre2_jit_match.3 \
doc/pcre2_jit_stack_assign.3 \
doc/pcre2_jit_stack_create.3 \
doc/pcre2_jit_stack_free.3 \
doc/pcre2_maketables.3 \
doc/pcre2_maketables_free.3 \
doc/pcre2_match.3 \
doc/pcre2_match_context_copy.3 \
doc/pcre2_match_context_create.3 \
doc/pcre2_match_context_free.3 \
doc/pcre2_match_data_create.3 \
doc/pcre2_match_data_create_from_pattern.3 \
doc/pcre2_match_data_free.3 \
doc/pcre2_pattern_convert.3 \
doc/pcre2_pattern_info.3 \
doc/pcre2_serialize_decode.3 \
doc/pcre2_serialize_encode.3 \
doc/pcre2_serialize_free.3 \
doc/pcre2_serialize_get_number_of_codes.3 \
doc/pcre2_set_bsr.3 \
doc/pcre2_set_callout.3 \
doc/pcre2_set_character_tables.3 \
doc/pcre2_set_compile_extra_options.3 \
doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_depth_limit.3 \
doc/pcre2_set_glob_escape.3 \
doc/pcre2_set_glob_separator.3 \
doc/pcre2_set_heap_limit.3 \
doc/pcre2_set_match_limit.3 \
doc/pcre2_set_max_pattern_compiled_length.3 \
doc/pcre2_set_max_pattern_length.3 \
doc/pcre2_set_max_varlookbehind.3 \
doc/pcre2_set_offset_limit.3 \
doc/pcre2_set_optimize.3 \
doc/pcre2_set_newline.3 \
doc/pcre2_set_parens_nest_limit.3 \
doc/pcre2_set_recursion_limit.3 \
doc/pcre2_set_recursion_memory_management.3 \
doc/pcre2_set_substitute_callout.3 \
doc/pcre2_set_substitute_case_callout.3 \
doc/pcre2_substitute.3 \
doc/pcre2_substring_copy_byname.3 \
doc/pcre2_substring_copy_bynumber.3 \
doc/pcre2_substring_free.3 \
doc/pcre2_substring_get_byname.3 \
doc/pcre2_substring_get_bynumber.3 \
doc/pcre2_substring_length_byname.3 \
doc/pcre2_substring_length_bynumber.3 \
doc/pcre2_substring_list_free.3 \
doc/pcre2_substring_list_get.3 \
doc/pcre2_substring_nametable_scan.3 \
doc/pcre2_substring_number_from_name.3 \
doc/pcre2api.3 \
doc/pcre2build.3 \
doc/pcre2callout.3 \
doc/pcre2compat.3 \
doc/pcre2convert.3 \
doc/pcre2demo.3 \
doc/pcre2grep.1 \
doc/pcre2jit.3 \
doc/pcre2limits.3 \
doc/pcre2matching.3 \
doc/pcre2partial.3 \
doc/pcre2pattern.3 \
doc/pcre2perform.3 \
doc/pcre2posix.3 \
doc/pcre2sample.3 \
doc/pcre2serialize.3 \
doc/pcre2syntax.3 \
doc/pcre2test.1 \
doc/pcre2unicode.3
# The Libtool libraries to install. We'll add to this later.
lib_LTLIBRARIES =
# Unit tests you want to run when people type 'make check'.
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
TESTS =
check_SCRIPTS =
dist_noinst_SCRIPTS =
# Some of the binaries we make are to be installed, and others are
# (non-user-visible) helper programs needed to build the libraries.
bin_PROGRAMS =
noinst_PROGRAMS =
# Additional files to delete on 'make clean', 'make distclean',
# and 'make maintainer-clean'. It turns out that the default is to delete only
# those binaries that *this* configuration has created. If the configuration
# has been changed, some binaries may not get automatically deleted. Therefore
# we list them here.
CLEANFILES = \
pcre2_dftables \
pcre2_jit_test \
pcre2fuzzcheck-8 \
pcre2fuzzcheck-16 \
pcre2fuzzcheck-32 \
pcre2demo
DISTCLEANFILES = src/config.h.in~
MAINTAINERCLEANFILES =
# Additional files to bundle with the distribution, over and above what
# the Autotools include by default.
EXTRA_DIST =
# These files contain additional m4 macros that are used by autoconf.
EXTRA_DIST += \
m4/ax_pthread.m4 m4/pcre2_visibility.m4
# These files contain maintenance information
EXTRA_DIST += \
NON-AUTOTOOLS-BUILD \
HACKING
# These are support files for building with Bazel or Zig
EXTRA_DIST += \
BUILD.bazel \
MODULE.bazel \
WORKSPACE.bazel \
build.zig
# These are support files for building under VMS
EXTRA_DIST += \
vms/configure.com \
vms/openvms_readme.txt \
vms/pcre2.h_patch \
vms/stdint.h
# These files are usable versions of pcre2.h and config.h that are distributed
# for the benefit of people who are building PCRE2 manually, without the
# Autotools support.
EXTRA_DIST += \
src/pcre2.h.generic \
src/config.h.generic
# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE
# version number. Therefore, we can create the generic version just by copying.
src/pcre2.h.generic: src/pcre2.h.in configure.ac
rm -f $@
cp -p src/pcre2.h $@
# It is more complicated for config.h.generic. We need the version that results
# from a default configuration so as to get all the default values for PCRE
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
# doing a configure in a temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you just try
# running configure in a new directory, it complains. For this reason, we move
# config.status out of the way while doing the default configuration. The
# resulting config.h is munged by perl to put #ifdefs round any #defines for
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings.
src/config.h.generic: configure.ac
rm -rf $@ _generic
mkdir _generic
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
cd _generic && $(abs_top_srcdir)/configure || :
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
test -f _generic/src/config.h
perl -n \
-e 'BEGIN{$$blank=0;}' \
-e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
_generic/src/config.h >$@
rm -rf _generic
MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic
# These are the header files we'll install. We do not distribute pcre2.h
# because it is generated from pcre2.h.in.
nodist_include_HEADERS = src/pcre2.h
include_HEADERS = src/pcre2posix.h
# This is the "config" script.
bin_SCRIPTS = pcre2-config
## ---------------------------------------------------------------
## The pcre2_dftables program is used to rebuild character tables before
## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
## installed program. The default (when --enable-rebuild-chartables is not
## specified) is to copy a distributed set of tables that are defined for ASCII
## code. In this case, pcre2_dftables is not needed.
if WITH_REBUILD_CHARTABLES
noinst_PROGRAMS += pcre2_dftables
pcre2_dftables_SOURCES = src/pcre2_dftables.c
src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
rm -f $@
./pcre2_dftables$(EXEEXT) $@
else
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_REBUILD_CHARTABLES
BUILT_SOURCES = src/pcre2_chartables.c
NODIST_SOURCES = src/pcre2_chartables.c
## Define the list of common sources, then arrange to build whichever of the
## 8-, 16-, or 32-bit libraries are configured.
COMMON_SOURCES = \
src/pcre2_auto_possess.c \
src/pcre2_chkdint.c \
src/pcre2_compile.c \
src/pcre2_compile.h \
src/pcre2_compile_class.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_char_inc.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_neon_inc.h \
src/pcre2_jit_simd_inc.h \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_script_run.c \
src/pcre2_serialize.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substitute.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_util.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c
# The pcre2_ucptables.c file is #included by pcre2_tables.c
EXTRA_DIST += src/pcre2_ucptables.c
if WITH_PCRE2_8
lib_LTLIBRARIES += libpcre2-8.la
libpcre2_8_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_8_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_8_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=8 \
$(VISIBILITY_CFLAGS) \
$(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre2_8_la_LIBADD =
endif # WITH_PCRE2_8
if WITH_PCRE2_16
lib_LTLIBRARIES += libpcre2-16.la
libpcre2_16_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_16_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_16_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=16 \
$(VISIBILITY_CFLAGS) \
$(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre2_16_la_LIBADD =
endif # WITH_PCRE2_16
if WITH_PCRE2_32
lib_LTLIBRARIES += libpcre2-32.la
libpcre2_32_la_SOURCES = \
$(COMMON_SOURCES)
nodist_libpcre2_32_la_SOURCES = \
$(NODIST_SOURCES)
libpcre2_32_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=32 \
$(VISIBILITY_CFLAGS) \
$(CET_CFLAGS) \
$(AM_CFLAGS)
libpcre2_32_la_LIBADD =
endif # WITH_PCRE2_32
# The pcre2_chartables.c.dist file is the default version of
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
EXTRA_DIST += src/pcre2_chartables.c.dist
CLEANFILES += src/pcre2_chartables.c
# The JIT compiler lives in a separate directory, but its files are #included
# when pcre2_jit_compile.c is processed, so they must be distributed.
EXTRA_DIST += \
deps/sljit/sljit_src/sljitConfig.h \
deps/sljit/sljit_src/sljitConfigCPU.h \
deps/sljit/sljit_src/sljitConfigInternal.h \
deps/sljit/sljit_src/sljitLir.c \
deps/sljit/sljit_src/sljitLir.h \
deps/sljit/sljit_src/sljitNativeARM_32.c \
deps/sljit/sljit_src/sljitNativeARM_64.c \
deps/sljit/sljit_src/sljitNativeARM_T2_32.c \
deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \
deps/sljit/sljit_src/sljitNativeMIPS_32.c \
deps/sljit/sljit_src/sljitNativeMIPS_64.c \
deps/sljit/sljit_src/sljitNativeMIPS_common.c \
deps/sljit/sljit_src/sljitNativePPC_32.c \
deps/sljit/sljit_src/sljitNativePPC_64.c \
deps/sljit/sljit_src/sljitNativePPC_common.c \
deps/sljit/sljit_src/sljitNativeRISCV_32.c \
deps/sljit/sljit_src/sljitNativeRISCV_64.c \
deps/sljit/sljit_src/sljitNativeRISCV_common.c \
deps/sljit/sljit_src/sljitNativeS390X.c \
deps/sljit/sljit_src/sljitNativeX86_32.c \
deps/sljit/sljit_src/sljitNativeX86_64.c \
deps/sljit/sljit_src/sljitNativeX86_common.c \
deps/sljit/sljit_src/sljitSerialize.c \
deps/sljit/sljit_src/sljitUtils.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c
# Some of the JIT sources are also in separate files that are #included.
EXTRA_DIST += \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c
if WITH_PCRE2_8
libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS)
endif # WITH_PCRE2_32
if WITH_VALGRIND
if WITH_PCRE2_8
libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE2_32
endif # WITH_VALGRIND
if WITH_GCOV
if WITH_PCRE2_8
libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_8
if WITH_PCRE2_16
libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_16
if WITH_PCRE2_32
libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE2_32
endif # WITH_GCOV
## A version of the 8-bit library that has a POSIX API.
if WITH_PCRE2_8
lib_LTLIBRARIES += libpcre2-posix.la
libpcre2_posix_la_SOURCES = src/pcre2posix.c
libpcre2_posix_la_CFLAGS = \
-DPCRE2_CODE_UNIT_WIDTH=8 @PCRE2POSIX_CFLAG@ \
$(VISIBILITY_CFLAGS) $(AM_CFLAGS)
libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
libpcre2_posix_la_LIBADD = libpcre2-8.la
if WITH_GCOV
libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_GCOV
endif # WITH_PCRE2_8
## Build pcre2grep and optional fuzzer stuff if the 8-bit library is enabled
if WITH_PCRE2_8
bin_PROGRAMS += pcre2grep
pcre2grep_SOURCES = src/pcre2grep.c
pcre2grep_CFLAGS = $(AM_CFLAGS)
pcre2grep_LDADD = $(LIBZ) $(LIBBZ2)
pcre2grep_LDADD += libpcre2-8.la
if WITH_GCOV
pcre2grep_CFLAGS += $(GCOV_CFLAGS)
pcre2grep_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE2_8
## If fuzzer support is enabled, build a non-distributed library containing the
## fuzzing function. Also build the standalone checking binary from the same
## source but using -DSTANDALONE.
if WITH_FUZZ_SUPPORT
noinst_LIBRARIES =
if WITH_PCRE2_8
noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a
_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c
_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS)
_libs_libpcre2_fuzzsupport_a_LIBADD =
noinst_PROGRAMS += pcre2fuzzcheck-8
pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c
pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
pcre2fuzzcheck_8_LDADD = libpcre2-8.la
if WITH_GCOV
pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS)
pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE2_8
if WITH_PCRE2_16
noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a
_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c
_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
_libs_libpcre2_fuzzsupport_16_a_LIBADD =
noinst_PROGRAMS += pcre2fuzzcheck-16
pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c
pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
pcre2fuzzcheck_16_LDADD = libpcre2-16.la
if WITH_GCOV
pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS)
pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE2_16
if WITH_PCRE2_32
noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a
_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c
_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
_libs_libpcre2_fuzzsupport_32_a_LIBADD =
noinst_PROGRAMS += pcre2fuzzcheck-32
pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c
pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
pcre2fuzzcheck_32_LDADD = libpcre2-32.la
if WITH_GCOV
pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS)
pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE2_32
endif # WITH_FUZZ_SUPPORT
## -------- Testing ----------
## If the 8-bit library is enabled, build the POSIX wrapper test program and
## arrange for it to run.
if WITH_PCRE2_8
TESTS += pcre2posix_test
noinst_PROGRAMS += pcre2posix_test
pcre2posix_test_SOURCES = src/pcre2posix_test.c
pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@
pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la
endif # WITH_PCRE2_8
## If JIT support is enabled, arrange for the JIT test program to run.
if WITH_JIT
TESTS += pcre2_jit_test
noinst_PROGRAMS += pcre2_jit_test
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
pcre2_jit_test_LDADD =
if WITH_PCRE2_8
pcre2_jit_test_LDADD += libpcre2-8.la
endif # WITH_PCRE2_8
if WITH_PCRE2_16
pcre2_jit_test_LDADD += libpcre2-16.la
endif # WITH_PCRE2_16
if WITH_PCRE2_32
pcre2_jit_test_LDADD += libpcre2-32.la
endif # WITH_PCRE2_32
if WITH_GCOV
pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS)
pcre2_jit_test_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_JIT
# Build the general pcre2test program. The file src/pcre2_printint.c is
# #included by pcre2test as many times as needed, at different code unit
# widths.
bin_PROGRAMS += pcre2test
EXTRA_DIST += src/pcre2_printint.c
pcre2test_SOURCES = src/pcre2test.c
pcre2test_CFLAGS = $(AM_CFLAGS)
pcre2test_LDADD = $(LIBREADLINE)
if WITH_PCRE2_8
pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la
endif # WITH_PCRE2_8
if WITH_PCRE2_16
pcre2test_LDADD += libpcre2-16.la
endif # WITH_PCRE2_16
if WITH_PCRE2_32
pcre2test_LDADD += libpcre2-32.la
endif # WITH_PCRE2_32
if WITH_VALGRIND
pcre2test_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_VALGRIND
if WITH_GCOV
pcre2test_CFLAGS += $(GCOV_CFLAGS)
pcre2test_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
## The main library tests. Each test is a binary plus a script that runs that
## binary in various ways. We install these test binaries in case folks find it
## helpful. The two .bat files are for running the tests under Windows.
TESTS += RunTest
EXTRA_DIST += RunTest.bat
dist_noinst_SCRIPTS += RunTest
## When the 8-bit library is configured, pcre2grep will have been built.
if WITH_PCRE2_8
TESTS += RunGrepTest
EXTRA_DIST += RunGrepTest.bat
dist_noinst_SCRIPTS += RunGrepTest
endif # WITH_PCRE2_8
## Distribute all the test data files
EXTRA_DIST += \
testdata/grepbinary \
testdata/grepfilelist \
testdata/grepinput \
testdata/grepinput3 \
testdata/grepinput8 \
testdata/grepinputBad8 \
testdata/grepinputBad8_Trail \
testdata/grepinputC.bz2 \
testdata/grepinputC.gz \
testdata/grepinputM \
testdata/grepinputUN \
testdata/grepinputv \
testdata/grepinputx \
testdata/greplist \
testdata/grepnot.bz2 \
testdata/grepoutput \
testdata/grepoutput8 \
testdata/grepoutputC \
testdata/grepoutputCN \
testdata/grepoutputCNU \
testdata/grepoutputCU \
testdata/grepoutputCbz2 \
testdata/grepoutputCgz \
testdata/grepoutputN \
testdata/grepoutputUN \
testdata/greppatN4 \
testdata/testbtables \
testdata/testinput1 \
testdata/testinput2 \
testdata/testinput3 \
testdata/testinput4 \
testdata/testinput5 \
testdata/testinput6 \
testdata/testinput7 \
testdata/testinput8 \
testdata/testinput9 \
testdata/testinput10 \
testdata/testinput11 \
testdata/testinput12 \
testdata/testinput13 \
testdata/testinput14 \
testdata/testinput15 \
testdata/testinput16 \
testdata/testinput17 \
testdata/testinput18 \
testdata/testinput19 \
testdata/testinput20 \
testdata/testinput21 \
testdata/testinput22 \
testdata/testinput23 \
testdata/testinput24 \
testdata/testinput25 \
testdata/testinput26 \
testdata/testinput27 \
testdata/testinputEBC \
testdata/testinputheap \
testdata/testoutput1 \
testdata/testoutput2 \
testdata/testoutput3 \
testdata/testoutput3A \
testdata/testoutput3B \
testdata/testoutput4 \
testdata/testoutput5 \
testdata/testoutput6 \
testdata/testoutput7 \
testdata/testoutput8-16-2 \
testdata/testoutput8-16-3 \
testdata/testoutput8-16-4 \
testdata/testoutput8-32-2 \
testdata/testoutput8-32-3 \
testdata/testoutput8-32-4 \
testdata/testoutput8-8-2 \
testdata/testoutput8-8-3 \
testdata/testoutput8-8-4 \
testdata/testoutput9 \
testdata/testoutput10 \
testdata/testoutput11-16 \
testdata/testoutput11-32 \
testdata/testoutput12-16 \
testdata/testoutput12-32 \
testdata/testoutput13 \
testdata/testoutput14-16 \
testdata/testoutput14-32 \
testdata/testoutput14-8 \
testdata/testoutput15 \
testdata/testoutput16 \
testdata/testoutput17 \
testdata/testoutput18 \
testdata/testoutput19 \
testdata/testoutput20 \
testdata/testoutput21 \
testdata/testoutput22-16 \
testdata/testoutput22-32 \
testdata/testoutput22-8 \
testdata/testoutput23 \
testdata/testoutput24 \
testdata/testoutput25 \
testdata/testoutput26 \
testdata/testoutput27 \
testdata/testoutputEBC \
testdata/testoutputheap-16 \
testdata/testoutputheap-32 \
testdata/testoutputheap-8 \
testdata/valgrind-jit.supp \
testdata/wintestinput3 \
testdata/wintestoutput3 \
perltest.sh
# RunTest and RunGrepTest should clean up after themselves, but just in case
# they don't, add their working files to CLEANFILES.
CLEANFILES += \
testSinput \
test3input \
test3output \
test3outputA \
test3outputB \
testtry \
teststdout \
teststderr \
teststderrgrep \
testtemp1grep \
testtemp2grep \
testtrygrep \
testNinputgrep
## ------------ End of testing -------------
# PCRE2 demonstration program. Not built automatically. The point is that the
# users should build it themselves. So just distribute the source.
EXTRA_DIST += src/pcre2demo.c
# We have .pc files for pkg-config users.
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA =
if WITH_PCRE2_8
pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc
endif
if WITH_PCRE2_16
pkgconfig_DATA += libpcre2-16.pc
endif
if WITH_PCRE2_32
pkgconfig_DATA += libpcre2-32.pc
endif
# gcov/lcov code coverage reporting
#
# Coverage reporting targets:
#
# coverage: Create a coverage report from 'make check'
# coverage-baseline: Capture baseline coverage information
# coverage-reset: This zeros the coverage counters only
# coverage-report: This creates the coverage report only
# coverage-clean-report: This removes the generated coverage report
# without cleaning the coverage data itself
# coverage-clean-data: This removes the captured coverage data without
# removing the coverage files created at compile time (*.gcno)
# coverage-clean: This cleans all coverage data including the generated
# coverage report.
if WITH_GCOV
COVERAGE_TEST_NAME = $(PACKAGE)
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
COVERAGE_LCOV_EXTRA_FLAGS =
COVERAGE_GENHTML_EXTRA_FLAGS =
coverage_quiet = $(coverage_quiet_$(V))
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
coverage_quiet_0 = --quiet
coverage-check: all
-$(MAKE) $(AM_MAKEFLAGS) -k check
coverage-baseline:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--capture \
--initial
coverage-report:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--capture \
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
--test-name "$(COVERAGE_TEST_NAME)" \
--no-checksum \
--compat-libtool \
$(COVERAGE_LCOV_EXTRA_FLAGS)
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
"/tmp/*" \
"/usr/include/*" \
"$(includedir)/*"
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
LANG=C $(GENHTML) $(coverage_quiet) \
--prefix $(top_builddir) \
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
--show-details "$(COVERAGE_OUTPUT_FILE)" \
--legend \
$(COVERAGE_GENHTML_EXTRA_FLAGS)
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
coverage-reset:
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
coverage-clean-report:
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
coverage-clean-data:
-find $(top_builddir) -name "*.gcda" -delete
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
-find $(top_builddir) -name "*.gcno" -delete
coverage-distclean: coverage-clean
coverage: coverage-reset coverage-baseline coverage-check coverage-report
clean-local: coverage-clean
distclean-local: coverage-distclean
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
# Without coverage support, still arrange for 'make distclean' to get rid of
# any coverage files that may have been left from a different configuration.
else
coverage:
@echo "Configuring with --enable-coverage is required to generate code coverage report."
DISTCLEANFILES += src/*.gcda src/*.gcno
distclean-local:
rm -rf $(PACKAGE)-$(VERSION)-coverage*
endif # WITH_GCOV
## CMake support
EXTRA_DIST += \
cmake/COPYING-CMAKE-SCRIPTS \
cmake/FindEditline.cmake \
cmake/FindReadline.cmake \
cmake/pcre2-config-version.cmake.in \
cmake/pcre2-config.cmake.in \
CMakeLists.txt \
config-cmake.h.in
## end Makefile.am

578
libs/wxWidgets-3.3.1/3rdparty/pcre/NEWS vendored Normal file
View File

@@ -0,0 +1,578 @@
News about PCRE2 releases
-------------------------
Version 10.45 05-February-2025
------------------------------
This is a comparatively large release, incorporating new features, some
bugfixes, and a few changes with slight backwards compatibility implications.
Please see the ChangeLog and Git log for further details.
Only changes to behaviour, changes to the API, and major changes to the pattern
syntax are described here.
This release is the first to be available as a (signed) Git tag, or
alternatively as a (signed) tarball of the Git tag.
This is also the first release to be made by the new maintainers of PCRE2, and
we would like to thank Philip Hazel, creator and maintainer of PCRE and PCRE2.
* (Git change) The sljit project has been split out into a separate Git
repository. Git users must now run `git submodule init; git submodule update`
after a Git checkout.
* (Behaviour change) Update Unicode support to UCD 16.
* (Match behaviour change) Case-insensitive matching of Unicode properties
Ll, Lt, and Lu has been changed to match Perl. Previously, /\p{Ll}/i would
match only lower-case characters (even though case-insensitive matching was
specified). This also affects case-insensitive matching of POSIX classes such
as [:lower:].
* (Minor match behaviour change) Case-insensitive matching of backreferences now
respects the PCRE2_EXTRA_CASELESS_RESTRICT option.
* (Minor pattern syntax change) Parsing of the \x escape is stricter, and is
no longer parsed as an escape for the NUL character if not followed by '{' or
a hexadecimal digit. Use \x00 instead.
* (Major new feature) Add a new feature called scan substring. This is a new
type of assertion which matches the content of a capturing block to a
sub-pattern.
Example: to find a word that contains the rare (in English) sequence of
letters "rh" not at the start:
\b(\w++)(*scan_substring:(1).+rh)
The first group captures a word which is then scanned by the
(*scan_substring:(1) ... ) assertion, which tests whether the pattern ".+rh"
matches the capture group "(1)".
* (Major new feature) Add support for UTS#18 compatible character classes,
using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a
metacharacter within character classes and the operators '&&', '--' and '~~',
allowing subtractions and intersections of character classes to be easily
expressed.
Example: to match Thai or Greek letters (but not letters or other characters
in those scripts), use [\p{L}&&[\p{Thai}||\p{Greek}]].
* (Major new feature) Add support for Perl-style extended character classes,
using the syntax (?[...]). This also allows expressing subtractions and
intersections of character classes, but using a different syntax to UTS#18.
Example: to match Thai or Greek letters (but not letters or other characters
in those scripts), use (?[\p{L} & (\p{Thai} + \p{Greek})]).
* (Minor feature) Significant improvements to the character class match engine.
Compiled character classes are now more compact, and have faster matching
for large or complex character sets, using binary search through the set.
* JIT compilation now fails with the new error code PCRE2_ERROR_JIT_UNSUPPORTED
for patterns which use features not supported by the JIT compiler.
* (Minor feature) New options PCRE2_EXTRA_NO_BS0 (disallow \0 as an escape for
the NUL character); PCRE2_EXTRA_PYTHON_OCTAL (use Python disambiguation rules
for deciding whether \12 is a backreference or an octal escape);
PCRE2_EXTRA_NEVER_CALLOUT (disable callout syntax entirely);
PCRE2_EXTRA_TURKISH_CASING (use Turkish rules for case-insensitive matching).
* (Minor feature) Add new API function pcre2_set_optimize() for controlling
which optimizations are enabled.
* (Minor new features) A variety of extensions have been made to
pcre2_substitute() and its syntax for replacement strings. These now support:
\123 octal escapes; titlecasing \u\L; \1 backreferences; \g<1> and $<NAME>
backreferences; $& $` $' and $_; new function
pcre2_set_substitute_case_callout() to allow locale-aware case transformation.
Version 10.44 07-June-2024
--------------------------
This is mostly a bug-fix and tidying release. There is one new function, to set
a maximum size for a compiled pattern. The maximum name length for groups is
increased to 128. Some auxiliary files for building under VMS are added.
Version 10.43 16-February-2024
------------------------------
There are quite a lot of changes in this release (see ChangeLog and Git log for
a list). Those that are not bugfixes or code tidies are:
* The JIT code no longer supports ARMv5 architecture.
* A new function pcre2_get_match_data_heapframes_size() for finer heap control.
* New option flags to restrict the interaction between ASCII and non-ASCII
characters for caseless matching and \d and friends. There are also new
pattern constructs to control these flags from within a pattern.
* Upgrade to Unicode 15.0.0.
* Treat a NULL pattern with zero length as an empty string.
* Added support for limited-length variable-length lookbehind assertions, with
a default maximum length of 255 characters (same as Perl) but with a function
to adjust the limit.
* Support for LoongArch in JIT.
* Perl changed the meaning of (for example) {,3} which did not used to be
recognized as a quantifier. Now it means {0,3} and PCRE2 has also changed.
Note that {,} is still not a quantifier.
* Following Perl, allow spaces and tabs after { and before } in all Perl-
compatible items that use braces, and also around commas in quantifiers. The
one exception in PCRE2 is \u{...}, which is from ECMAScript, not Perl, and
PCRE2 follows ECMAScript usage.
* Changed the meaning of \w and its synonyms and derivatives (\b and \B) in UCP
mode to follow Perl. It now matches characters whose general categories are L
or N or whose particular categories are Mn (non-spacing mark) or Pc
(combining punctuation).
* Changed the default meaning of [:xdigit:] in UCP mode to follow Perl. It now
matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
be used to keep it ASCII only.
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp,
--case-restrict and --posix-digit.
* Add --group-separator and --no-group-separator to pcre2grep.
Version 10.42 11-December-2022
------------------------------
This is an unexpectedly early release to fix a problem that was introduced in
10.41. ChangeLog number 19 (GitHub #139) added the default definition of
PCRE2_CALL_CONVENTION to pcre2posix.c instead of pcre2posix.h, which meant that
programs including pcre2posix.h but not pcre2.h couldn't compile. A new test
that checks this case has been added.
A couple of other minor issues are also fixed, and a patch for an intermittent
JIT fault is also included. See ChangeLog and the Git log.
Version 10.41 06-December-2022
------------------------------
This is another mainly bug-fixing and code-tidying release. There is one
significant upgrade to pcre2grep: it now behaves like GNU grep when matching
more than one pattern and a later pattern matches at an earlier point in the
subject when the matched substrings are being identified by colour or by
offsets.
Version 10.40 15-April-2022
---------------------------
This is mostly a bug-fixing and code-tidying release. However, there are some
extensions to Unicode property handling:
* Added support for Bidi_Class and a number of binary Unicode properties,
including Bidi_Control.
* A number of changes to script matching for \p and \P:
(a) Script extensions for a character are now coded as a bitmap instead of
a list of script numbers, which should be faster and does not need a
loop.
(b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
sc and scx).
(c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
the same as \p{scx:scriptname} because this change happened in Perl at
release 5.26.
(d) The standard Unicode 4-letter abbreviations for script names are now
recognized.
(e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
hyphens, and underscores are ignored in property names, which are then
matched independent of case.
As always, see ChangeLog for a list of all changes (also the Git log).
Version 10.39 29-October-2021
-----------------------------
This release is happening soon after 10.38 because the bug fix is important.
1. Fix incorrect detection of alternatives in first character search in JIT.
2. Update to Unicode 14.0.0.
3. Some code cleanups (see ChangeLog).
Version 10.38 01-October-2021
-----------------------------
As well as some bug fixes and tidies (as always, see ChangeLog for details),
the documentation is updated to list the new URLs, following the move of the
source repository to GitHub and the mailing list to Google Groups.
* The CMake build system can now build both static and shared libraries in one
go.
* Following Perl's lead, \K is now locked out in lookaround assertions by
default, but an option is provided to re-enable the previous behaviour.
Version 10.37 26-May-2021
-------------------------
A few more bug fixes and tidies. The only change of real note is the removal of
the actual POSIX names regcomp etc. from the POSIX wrapper library because
these have caused issues for some applications (see 10.33 #2 below).
Version 10.36 04-December-2020
------------------------------
Again, mainly bug fixes and tidies. The only enhancements are the addition of
GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
handling of substitution strings for both -O and callouts in pcre2grep, with
the addition of $x{...} and $o{...} to allow for characters whose code points
are greater than 255 in Unicode mode.
NOTE: there is an outstanding issue with JIT support for MacOS on arm64
hardware. For details, please see Bugzilla issue #2618.
Version 10.35 15-April-2020
---------------------------
Bugfixes, tidies, and a few new enhancements.
1. Capturing groups that contain recursive backreferences to themselves are no
longer automatically atomic, because the restriction is no longer necessary
as a result of the 10.30 restructuring.
2. Several new options for pcre2_substitute().
3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
character properties are used for upper/lower case computations on characters
whose code points are greater than 127.
4. The character tables (for low-valued characters) can now more easily be
saved and restored in binary.
5. Updated to Unicode 13.0.0.
Version 10.34 21-November-2019
------------------------------
Another release with a few enhancements as well as bugfixes and tidies. The
main new features are:
1. There is now some support for matching in invalid UTF strings.
2. Non-atomic positive lookarounds are implemented in the pcre2_match()
interpreter, but not in JIT.
3. Added two new functions: pcre2_get_match_data_size() and
pcre2_maketables_free().
4. Upgraded to Unicode 12.1.0.
Version 10.33 16-April-2019
---------------------------
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
ChangeLog for the full list):
1. Callouts from pcre2_substitute() are now available.
2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
names are defined as macros. This should help avoid linking with the wrong
library in some environments, while still exporting the POSIX names for
pre-existing programs that use them.
3. Some new options:
(a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n.
(b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...}
construct.
(c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be
made, instead of just remembering a pointer.
4. Some new Perl features:
(a) Perl 5.28's experimental alphabetic names for atomic groups and
lookaround assertions, for example, (*pla:...) and (*atomic:...).
(b) The new Perl "script run" features (*script_run:...) and
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
(c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in
capture group names.
5. --disable-percent-zt disables the use of %zu and %td in formatting strings
in pcre2test. They were already automatically disabled for VC and older C
compilers.
6. Some changes related to callouts in pcre2grep:
(a) Support for running an external program under VMS has been added, in
addition to Windows and fork() support.
(b) --disable-pcre2grep-callout-fork restricts the callout support in
to the inbuilt echo facility.
Version 10.32 10-September-2018
-------------------------------
This is another mainly bugfix and tidying release with a few minor
enhancements. These are the main ones:
1. pcre2grep now supports the inclusion of binary zeros in patterns that are
read from files via the -f option.
2. ./configure now supports --enable-jit=auto, which automatically enables JIT
if the hardware supports it.
3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
local workspace and local ovectors. Instead, an initial block of stack is
reserved, but if this is insufficient, heap memory is used. The heap limit
parameter now applies to pcre2_dfa_match().
4. Updated to Unicode version 11.0.0.
5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
6. Added support for \N{U+dddd}, but only in Unicode mode.
7. Added support for (?^) to unset all imnsx options.
Version 10.31 12-February-2018
------------------------------
This is mainly a bugfix and tidying release (see ChangeLog for full details).
However, there are some minor enhancements.
1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and
PCRE2_CONFIG_COMPILED_WIDTHS.
2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the
extra compile time options.
3. There are now public names for all the pcre2_compile() error numbers.
4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new
field callout_flags in callout blocks.
Version 10.30 14-August-2017
----------------------------
The full list of changes that includes bugfixes and tidies is, as always, in
ChangeLog. These are the most important new features:
1. The main interpreter, pcre2_match(), has been refactored into a new version
that does not use recursive function calls (and therefore the system stack) for
remembering backtracking positions. This makes --disable-stack-for-recursion a
NOOP. The new implementation allows backtracking into recursive group calls in
patterns, making it more compatible with Perl, and also fixes some other
previously hard-to-do issues. For patterns that have a lot of backtracking, the
heap is now used, and there is an explicit limit on the amount, settable by
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained,
but is renamed as "depth limit" (though the old names remain for
compatibility).
There is also a change in the way callouts from pcre2_match() are handled. The
offset_vector field in the callout block is no longer a pointer to the
actual ovector that was passed to the matching function in the match data
block. Instead it points to an internal ovector of a size large enough to hold
all possible captured substrings in the pattern.
2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at
the end of the subject.
3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and
pcre2test is upgraded to support it. Setting within the pattern by (?xx) is
also supported.
4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this.
5. Additional compile options in the compile context are now available, and the
first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
6. The newline type PCRE2_NEWLINE_NUL is now available.
7. The match limit value now also applies to pcre2_dfa_match() as there are
patterns that can use up a lot of resources without necessarily recursing very
deeply.
8. The option REG_PEND (a GNU extension) is now available for the POSIX
wrapper. Also there is a new option PCRE2_LITERAL which is used to support
REG_NOSPEC.
9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the
benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented
using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This
is tidier and also fixes some bugs.
10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0.
11. There are some experimental functions for converting foreign patterns
(globs and POSIX patterns) into PCRE2 patterns.
Version 10.23 14-February-2017
------------------------------
1. ChangeLog has the details of a lot of bug fixes and tidies.
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
checking is now done in the pre-pass that identifies capturing groups. This has
reduced the amount of duplication and made the code tidier. While doing this,
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
details.)
3. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced
group must, of course be of fixed length.
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
not recognize this syntax.
5. pcre2grep now automatically expands its buffer up to a maximum set by
--max-buffer-size.
6. The -t option (grand total) has been added to pcre2grep.
7. A new function called pcre2_code_copy_with_tables() exists to copy a
compiled pattern along with a private copy of the character tables that is
uses.
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
tidy the code.
9. Several updates have been made to pcre2test and test scripts (see
ChangeLog).
Version 10.22 29-July-2016
--------------------------
1. ChangeLog has the details of a number of bug fixes.
2. The POSIX wrapper function regcomp() did not used to support back references
and subroutine calls if called with the REG_NOSUB option. It now does.
3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled
pattern.
4. Support for string callouts is added to pcre2grep.
5. Added the PCRE2_NO_JIT option to pcre2_match().
6. The pcre2_get_error_message() function now returns with a negative error
code if the error number it is given is unknown.
7. Several updates have been made to pcre2test and test scripts (see
ChangeLog).
Version 10.21 12-January-2016
-----------------------------
1. Many bugs have been fixed. A large number of them were provoked only by very
strange pattern input, and were discovered by fuzzers. Some others were
discovered by code auditing. See ChangeLog for details.
2. The Unicode tables have been updated to Unicode version 8.0.0.
3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
class, where both values are literal letters in the same case, omit the
non-letter EBCDIC code points within the range.
4. There have been a number of enhancements to the pcre2_substitute() function,
giving more flexibility to replacement facilities. It is now also possible to
cause the function to return the needed buffer size if the one given is too
small.
5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such
as (*THEN:name) to be processed for backslashes and to take note of
PCRE2_EXTENDED.
6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a
pattern uses \C, and --never-backslash-C makes it possible to compile a version
PCRE2 in which the use of \C is always forbidden.
7. A limit to the length of pattern that can be handled can now be set by
calling pcre2_set_max_pattern_length().
8. When matching an unanchored pattern, a match can be required to begin within
a given number of code units after the start of the subject by calling
pcre2_set_offset_limit().
9. The pcre2test program has been extended to test new facilities, and it can
now run the tests when LF on its own is not a valid newline sequence.
10. The RunTest script has also been updated to enable more tests to be run.
11. There have been some minor performance enhancements.
Version 10.20 30-June-2015
--------------------------
1. Callouts with string arguments and the pcre2_callout_enumerate() function
have been implemented.
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
subject in multiline mode.
4. The way named subpatterns are handled has been refactored. The previous
approach had several bugs.
5. The handling of \c in EBCDIC environments has been changed to conform to the
perlebcdic document. This is an incompatible change.
6. Bugs have been mended, many of them discovered by fuzzers.
Version 10.10 06-March-2015
---------------------------
1. Serialization and de-serialization functions have been added to the API,
making it possible to save and restore sets of compiled patterns, though
restoration must be done in the same environment that was used for compilation.
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
creator to specify that JIT is not to be used.
3. A number of bugs have been fixed. In particular, bugs that caused building
on Windows using CMake to fail have been mended.
Version 10.00 05-January-2015
-----------------------------
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
API, up to item 20 for release 8.36. New programs are recommended to use the
new library. Programs that use the original (PCRE1) API will need changing
before linking with the new library.
****

View File

@@ -0,0 +1,442 @@
Building PCRE2 without using autotools
--------------------------------------
This document contains the following sections:
General
Generic instructions for the PCRE2 C libraries
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
Building PCRE2 on Windows with Visual Studio
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
Building PCRE2 under VMS
GENERAL
The source of the PCRE2 libraries consists entirely of code written in Standard
C, and so should compile successfully on any system that has a Standard C
compiler and library.
The PCRE2 distribution includes a "configure" file for use by the
configure/make (autotools) build system, as found in many Unix-like
environments. The README file contains information about the options for
"configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE2 on Windows with CMake" below.
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
under the names src/config.h.generic and src/pcre2.h.generic. These are
provided for those who build PCRE2 without using "configure" or CMake. If you
use "configure" or CMake, the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES
There are three possible PCRE2 libraries, each handling data with a specific
code unit width: 8, 16, or 32 bits. You can build any combination of them. The
following are generic instructions for building a PCRE2 C library "by hand". If
you are going to use CMake, this section does not apply to you; you can skip
ahead to the CMake section. Note that the settings concerned with 8-bit,
16-bit, and 32-bit code units relate to the type of data string that PCRE2
processes. They are NOT referring to the underlying operating system bit width.
You do not have to do anything special to compile in a 64-bit environment, for
example.
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
macro settings that it contains to whatever is appropriate for your
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators by default. You need to #define at least one of
SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which
libraries you are going to build. You must set all that apply.
When you subsequently compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
An alternative approach is not to edit src/config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in src/config.h are used has changed between releases. (In the
configure/make world, this is handled automatically.) When upgrading to a
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
Note also that the src/config.h.generic file is created from a config.h
that was generated by Autotools, which automatically includes settings of
a number of macros that are not actually used by PCRE2 (for example,
HAVE_DLFCN_H).
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
Copy or rename file src/pcre2_chartables.c.dist as
src/pcre2_chartables.c.
OR:
Compile src/pcre2_dftables.c as a stand-alone program (using
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
the single argument "src/pcre2_chartables.c". This generates a set of
standard character tables and writes them to that file. The tables are
generated using the default C locale for your system. If you want to use
a locale that is specified by LC_xxx environment variables, add the -L
option to the pcre2_dftables command. You must use this method if you
are building on a system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
(4) For a library that supports 8-bit code units in the character strings that
it processes, compile the following source files from the src directory,
setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
-DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
or else use other -D settings to change the configuration as required.
pcre2_auto_possess.c
pcre2_chkdint.c
pcre2_chartables.c
pcre2_compile.c
pcre2_compile_class.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_extuni.c
pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE2 header files are first
sought in the src directory under the current directory. Otherwise you run
the risk of picking up a previously-installed file from somewhere else.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_jit_compile.c #includes other files from the sljit dependency,
all of whose names begin with "sljit". It also #includes
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
those yourself.
Note also that the pcre2_fuzzsupport.c file contains special code that is
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
you are doing that, you can ignore it.
(5) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the PCRE2 C 8-bit library,
typically called something like libpcre2-8. If your system has static and
shared libraries, you may have to do this once for each type.
(6) If you want to build a library that supports 16-bit or 32-bit code units,
set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4
above. If you want to build more than one PCRE2 library, repeat steps 4
and 5 as necessary.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the src/pcre2posix.h file and then
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
library. If targeting a DLL in Windows, make sure to include
-DPCRE2POSIX_SHARED with your compiler flags.
(8) The pcre2test program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you specfied in
src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
needs the pcre2posix wrapper library.
(9) Run pcre2test on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE2"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcre2test with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for Unicode support, and will not run if you have
built PCRE2 without it. See the comments at the start of each testinput
file. If you have a suitable Unix-like shell, the RunTest script will run
the appropriate tests for you. The command "RunTest list" will output a
list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention.
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
by running pcre2test with the -jit option. This is done automatically by
the RunTest script. You might also like to build and run the freestanding
JIT test program, src/pcre2_jit_test.c.
(11) The pcre2test program tests the POSIX wrapper library, but there is also a
freestanding test program in src/pcre2posix_test.c. It must be linked with
both the pcre2posix library and the 8-bit PCRE2 library.
(12) If you want to use the pcre2grep command, compile and link
src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need
the pcre2posix library). If you have built the PCRE2 library with JIT
support by defining SUPPORT_JIT in src/config.h, you can also define
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
STACK SIZE IN WINDOWS ENVIRONMENTS
Prior to release 10.30 the default system stack size of 1MiB in some Windows
environments caused issues with some tests. This should no longer be the case
for 10.30 and later releases.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE2 library in the form of
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in a
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
There are two ways of building PCRE2 using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE2 under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
On both MinGW and Cygwin, PCRE2 should build correctly using:
./configure && make && make install
This should create two libraries called libpcre2-8 and libpcre2-posix. These
are independent libraries: when you link with libpcre2-posix you must also link
with libpcre2-8, which contains the basic functions.
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE2 to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE2 are in UNIX format, with LF
characters as line terminators. Unless your PCRE2 library uses a default
newline option that includes LF as a valid newline, it may be necessary to
change the line terminators in the test files to get some of the tests to work.
BUILDING PCRE2 ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
If you are using CMake and encounter errors, deleting the CMake cache and
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
be deleted.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run CMake.
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
- Using the CMake GUI:
a) Run cmake-gui from the Shell environment of your build tool, for
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
VC/VC++.
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
c) Press the "Configure" button.
d) Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
e) The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
f) Press "Configure" again. The adjacent "Generate" button should now be
active.
g) Press "Generate".
5. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
Regardless of build system used, `cmake --build .` will build it.
6. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
Regardless of build system used, `ctest` will run the tests.
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
The code currently cannot be compiled without an inttypes.h header, which is
available only with Visual Studio 2013 or newer. However, this portable and
permissively-licensed implementation of the stdint.h header could be used as an
alternative:
http://www.azillionmonkeys.com/qed/pstdint.h
Just rename it and drop it into the top level of the build tree.
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created.
2. Edit RunTest.bat to identify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment it should be possible to build PCRE2 in the same way as in other
systems, with the EBCDIC related configuration settings, but it is not known if
anybody has tried this.
In native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see file 939 on this web site:
http://www.cbttape.org
Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
BUILDING PCRE2 UNDER VMS
Alexey Chuphin has contributed some auxiliary files for building PCRE2 under
OpenVMS. They are in the "vms" directory in the distribution tarball. Please
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
programs contain some VMS-specific code.
==============================
Last updated: 26 December 2024
==============================

View File

@@ -0,0 +1,970 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
API. Since its initial release in 2015, there has been further development of
the code and it now differs from PCRE1 in more than just the API. There are new
features, and the internals have been improved. The original PCRE1 library is
now obsolete and no longer maintained. The latest release of PCRE2 is available
in .tar.gz, tar.bz2, or .zip form from this GitHub repository:
https://github.com/PCRE2Project/pcre2/releases
There is a mailing list for discussion about the development of PCRE2 at
pcre2-dev@googlegroups.com. You can subscribe by sending an email to
pcre2-dev+subscribe@googlegroups.com.
You can access the archives and also subscribe or manage your subscription
here:
https://groups.google.com/g/pcre2-dev
Please read the NEWS file if you are upgrading from a previous release. The
contents of this README file are:
The PCRE2 APIs
Documentation for PCRE2
Building PCRE2 on non-Unix-like systems
Building PCRE2 without using autotools
Building PCRE2 using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Making new tarballs
Testing PCRE2
Character tables
File manifest
The PCRE2 APIs
--------------
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
are no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
man page). These are built into a library called libpcre2-posix. Note that this
just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
renamed or pointed at by a link (or the program modified, of course). See the
pcre2posix documentation for more details.
Documentation for PCRE2
-----------------------
If you install PCRE2 in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre2". The one that is
just called "pcre2" lists all the others. In addition to these man pages, the
PCRE2 documentation is supplied in two other forms:
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
doc/pcre2test.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except the
listing of pcre2demo.c and those that summarize individual functions. The
other two are the text forms of the section 1 man pages for the pcre2grep
and pcre2test commands. These text forms are provided for ease of scanning
with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre2/html.
Building PCRE2 on non-Unix-like systems
---------------------------------------
For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if
your system supports the use of "configure" and "make" you may be able to build
PCRE2 using autotools in the same way as for many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE2 has been compiled on many different operating systems. It should be
straightforward to build PCRE2 on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE2 without using autotools
--------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE2 without using autotools.
Building PCRE2 using autotools
------------------------------
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
If you have downloaded and unpacked a PCRE2 release tarball, run the
"configure" command from the PCRE2 directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
The files in the GitHub repository do not contain "configure". If you have
downloaded the PCRE2 source files from GitHub, before you can run "configure"
you must run the shell script called autogen.sh. This runs a number of
autotools to create a "configure" script (you must of course have the autotools
commands installed in order to do this).
Most commonly, people build PCRE2 within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE2
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE2 source
into /source/pcre2/pcre2-xxx, but you want to build it in
/build/pcre2/pcre2-xxx:
cd /build/pcre2/pcre2-xxx
/source/pcre2/pcre2-xxx/configure
PCRE2 is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE2
library. They are also documented in the pcre2build man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
Setting --disable-shared ensures that PCRE2 libraries are built as static
libraries. The binaries that are then created as part of the build process
(for example, pcre2test and pcre2grep) are linked statically with one or more
PCRE2 libraries, but may also be dynamically linked with other libraries such
as libc. If you want these binaries to be fully statically linked, you can
set LDFLAGS like this:
LDFLAGS=--static ./configure --disable-shared
Note the two hyphens in --static. Of course, this works only if static
versions of all the relevant libraries are available for linking. See also
"Shared libraries" below.
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
to disable building the 8-bit library.
. If you want to include support for just-in-time (JIT) compiling, which can
give large performance improvements on certain platforms, add --enable-jit to
the "configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error. If in doubt, use --enable-jit=auto, which
enables JIT only if the current hardware is supported.
. If you are enabling JIT under SELinux environment you may also want to add
--enable-jit-sealloc, which enables the use of an executable memory allocator
that is compatible with SELinux. Warning: this allocator is experimental!
It does not support fork() operation and may crash when no disk space is
available. This option has no effect if JIT is disabled.
. If you do not want to make use of the default support for UTF-8 Unicode
character strings in the 8-bit library, UTF-16 Unicode character strings in
the 16-bit library, or UTF-32 Unicode character strings in the 32-bit
library, you can add --disable-unicode to the "configure" command. This
reduces the size of the libraries. It is not possible to configure one
library with Unicode support, and another without, in the same configuration.
It is also not possible to use --enable-ebcdic (see below) with Unicode
support, so if this option is set, you must also use --disable-unicode.
When Unicode support is available, the use of a UTF encoding still has to be
enabled by setting the PCRE2_UTF option at run time or starting a pattern
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms.
As well as supporting UTF strings, Unicode support includes support for the
\P, \p, and \X sequences that recognize Unicode character properties.
However, only a subset of Unicode properties are supported; see the
pcre2pattern man page for details. Escape sequences such as \d and \w in
patterns do not by default make use of Unicode properties, but can be made to
do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
character as indicating the end of a line. Whatever you specify at build time
is the default; the caller of PCRE2 can change the selection at run time. The
default newline indicator is a single LF character (the Unix standard). You
can specify the default newline indicator by adding --enable-newline-is-cr,
--enable-newline-is-lf, --enable-newline-is-crlf,
--enable-newline-is-anycrlf, --enable-newline-is-any, or
--enable-newline-is-nul to the "configure" command, respectively.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE2 considers
to be the end of a line (see above). However, the caller of PCRE2 can
restrict \R to match only CR, LF, or CRLF. You can make this the default by
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. In a pattern, the escape sequence \C matches a single code unit, even in a
UTF mode. This can be dangerous because it breaks up multi-code-unit
characters. You can build PCRE2 with the use of \C permanently locked out by
adding --enable-never-backslash-C (note the upper case C) to the "configure"
command. When \C is allowed by the library, individual applications can lock
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of computing resource
it uses when matching a pattern. If the limit is exceeded during a match, the
match fails. The default is ten million. You can change the default by
setting, for example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre2_match() or pcre2_dfa_match() can supply their own value. There is more
discussion in the pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
(pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
matching process, which indirectly limits the amount of heap memory that is
used, and in the case of pcre2_dfa_match() the amount of stack as well. This
counter also has a default of ten million, which is essentially "unlimited".
You can change the default by setting, for example,
--with-match-limit-depth=5000
There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() and pcre2_dfa_match() interpreters:
--with-heap-limit=500
The units are kibibytes (units of 1024 bytes). This limit does not apply when
the JIT optimization (which has its own memory control features) is used.
There is more discussion on the pcre2api man page (search for
pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
64 kibibytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
to different parts of the compiled pattern. In the 16-bit library,
--with-link-size=3 is the same as --with-link-size=4, which (in both
libraries) uses four-byte offsets. Increasing the internal link size reduces
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
link size setting is ignored, as 4-byte offsets are always used.
. Lookbehind assertions in which one or more branches can match a variable
number of characters are supported only if there is a maximum matching length
for each top-level branch. There is a limit to this maximum that defaults to
255 characters. You can alter this default by a setting such as
--with-max-varlookbehind=100
The limit can be changed at runtime by calling pcre2_set_max_varlookbehind().
Lookbehind assertions in which every branch matches a fixed number of
characters (not necessarily all the same) are not constrained by this limit.
. For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called pcre2_dftables is compiled and run in the default C locale
when you obey "make". It builds a source file called pcre2_chartables.c. If
you do not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --disable-unicode
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE2 will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE2 itself.
. In environments where the gcc compiler is used and lcov is installed, if you
specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE2 for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE2. There is more information about coverage
reporting in the "pcre2build" documentation.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
. There is support for calling external programs during matching in the
pcre2grep command, using PCRE2's callout facility with string arguments. This
support can be disabled by adding --disable-pcre2grep-callout to the
"configure" command. There are two kinds of callout: one that generates
output from inbuilt code, and another that calls an external program. The
latter has special support for Windows and VMS; otherwise it assumes the
existence of the fork() function. This facility can be disabled by adding
--disable-pcre2grep-callout-fork to the "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default starting size (in bytes) of the internal buffer used by pcre2grep
can be set by, for example:
--with-pcre2grep-bufsize=51200
The value must be a plain integer. The default is 20480. The amount of memory
used by pcre2grep is actually three times this number, to allow for "before"
and "after" lines. If very long lines are encountered, the buffer is
automatically enlarged, up to a fixed maximum size.
. The default maximum size of pcre2grep's internal buffer can be set by, for
example:
--with-pcre2grep-max-bufsize=2097152
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
whichever is the larger.
. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licensed, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the
pcre2test build. In many operating environments with a system-installed
readline library this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), it may be
necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing
applications which link with readline the option to choose an appropriate
library." If you get error messages about missing functions tgetstr, tgetent,
tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses
library should fix it.
. The C99 standard defines formatting modifiers z and t for size_t and
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
environments other than Microsoft Visual Studio versions earlier than 2013
when __STDC_VERSION__ is defined and has a value greater than or equal to
199901L (indicating C99). However, there is at least one environment that
claims to be C99 but does not support these modifiers. If
--disable-percent-zt is specified, no use is made of the z or t modifiers.
Instead of %td or %zu, %lu is used, with a cast for size_t values.
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. If set, it causes an extra library
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
a single function called LLVMFuzzerTestOneInput() whose arguments are a
pointer to a string and the length of the string. When called, this function
tries to compile the string as a pattern, and if that succeeds, to match
it. This is done both with no options and with some random options bits that
are generated from the string. Setting --enable-fuzz-support also causes an
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
run under valgrind or used when PCRE2 is compiled with address sanitizing
enabled. It calls the fuzzing function and outputs information about what it
is doing. The input strings are specified by arguments: if an argument
starts with "=" the rest of it is a literal input string. Otherwise, it is
assumed to be a file name, and the contents of the file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for
backtracking instead of recursive function calls (which use the stack). This
is now obsolete because pcre2_match() was refactored always to use the heap
(in a much more efficient way than before). This option is retained for
backwards compatibility, but has no effect other than to output a warning.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. src/config.h build-time configuration options for the library
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
tarballs under the names config.h.generic and pcre2.h.generic. These are
provided for those who have to build PCRE2 without using "configure" or CMake.
If you use "configure" or CMake, the .generic versions are not used.
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds whichever of the
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
program called pcre2test. If you enabled JIT support with --enable-jit, another
test program called pcre2_jit_test is built as well. If the 8-bit library is
built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also
built. Running "make" with the -j option may speed up compilation on
multiprocessor systems.
The command "make check" runs all the appropriate tests. Details of the PCRE2
tests are given below in a separate section of this document. The -j option of
"make" can also be used when running the tests.
You can use "make install" to install PCRE2 into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcre2test
pcre2grep (if 8-bit support is enabled)
pcre2-config
Libraries (lib):
libpcre2-8 (if 8-bit support is enabled)
libpcre2-16 (if 16-bit support is enabled)
libpcre2-32 (if 32-bit support is enabled)
libpcre2-posix (if 8-bit support is enabled)
Configuration information (lib/pkgconfig):
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
Header files (include):
pcre2.h
pcre2posix.h
Man pages (share/man/man{1,3}):
pcre2grep.1
pcre2test.1
pcre2-config.1
pcre2.3
pcre2*.3 (lots more pages, all starting "pcre2")
HTML documentation (share/doc/pcre2/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre2):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
SECURITY
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
pcre2-config.txt the pcre2-config man page
If you want to remove PCRE2 from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre2-config, which can be used to
recall information about the PCRE2 configuration and installation. For example:
pcre2-config --version
prints the version number, and
pcre2-config --libs8
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --libs libpcre2-16
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE2 as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcre2test and pcre2grep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcre2grep and pcre2test are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE2 using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries. Note, however, that when you build only static
libraries, binary programs such as pcre2test and pcre2grep may still be
dynamically linked with other libraries (for example, libc) unless you set
LDFLAGS to --static when running "configure".
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
source file is compiled and run on the local host, in order to generate the
inbuilt character tables (the pcre2_chartables.c file). This will probably not
work, because pcre2_dftables.c needs to be compiled with the local compiler,
not the cross compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
hand and run it on the local host to make a new version of
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
at build time" for more details.
Making new tarballs
-------------------
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the maint/PrepareRelease script before making a distribution.
This script creates the .txt and HTML forms of the documentation from the man
pages.
Testing PCRE2
-------------
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the pcre2grep command.
When the 8-bit library is built, a test program for the POSIX wrapper, called
pcre2posix_test, is compiled, and when JIT support is enabled, a test program
called pcre2_jit_test is built. The scripts and the program tests are all run
when you obey "make check". For other environments, see the instructions in
NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcre2test test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcre2test. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 features are run only when Unicode support
is available. RunTest outputs a comment when it skips a test.
Many (but not all) of the tests that are not skipped are run twice if JIT
support is available. On the second run, JIT compilation is forced. This
testing can be suppressed by putting "-nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "-valgrind"
on the RunTest command line. To run pcre2test on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The test sequence starts with "test 0", which is a special test that has no
input file, and whose output is not checked. This is because it will be
different on different hardware and with different configurations. The test
exists in order to exercise some of pcre2test's code that would not otherwise
be run.
Tests 1 and 2 can always be run, as they expect only plain text strings (not
UTF) and make no use of Unicode properties. The first test file can be fed
directly into the perltest.sh script to check that Perl gives the same results.
The only difference you should see is in the first few lines, where the Perl
version is given instead of the PCRE2 version. The second set of tests check
auxiliary functions, error detection, and run-time flags that are specific to
PCRE2. It also uses the debugging flags to check some of the internals of
pcre2_compile().
If you build PCRE2 with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. For example, where the comparison test output contains
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
cases. This is not a bug in PCRE2.
Test 3 checks pcre2_maketables(), the facility for building a set of character
tables for a specific locale and using them instead of the default tables. The
script uses the "locale" command to check for the availability of the "fr_FR",
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
the list of available locales, the third test cannot be run, and a comment is
output to say why. If running this test produces an error like this:
** Failed to set locale "fr_FR"
it means that the given locale is not available on your system, despite being
listed by "locale". This does not mean that PCRE2 is broken. There are three
alternative output files for the third test, because three different versions
of the French locale have been encountered. The test passes if its output
matches any one of them.
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
with the perltest.sh script, and test 5 checking PCRE2-specific things.
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
non-UTF mode and UTF-mode with Unicode property support, respectively.
Test 8 checks some internal offsets and code size features, but it is run only
when Unicode support is enabled. The output is different in 8-bit, 16-bit, and
32-bit modes and for different link sizes, so there are different output files
for each mode and link size.
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
16-bit and 32-bit modes. These are tests that generate different output in
8-bit mode. Each pair are for general cases and Unicode support, respectively.
Test 13 checks the handling of non-UTF characters greater than 255 by
pcre2_dfa_match() in 16-bit and 32-bit modes.
Test 14 contains some special UTF and UCP tests that give different output for
different code unit widths.
Test 15 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the interpretive
matcher.
Test 16 is run only when JIT support is not available. It checks that an
attempt to use JIT has the expected behaviour.
Test 17 is run only when JIT support is available. It checks JIT complete and
partial modes, match-limiting under JIT, and other JIT-specific features.
Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to
the 8-bit library, without and with Unicode support, respectively.
Test 20 checks the serialization functions by writing a set of compiled
patterns to a file, and then reloading and checking them.
Tests 21 and 22 test \C support when the use of \C is not locked out, without
and with UTF support, respectively. Test 23 tests \C when it is locked out.
Tests 24 and 25 test the experimental pattern conversion functions, without and
with UTF support, respectively.
Test 26 checks Unicode property support using tests that are generated
automatically from the Unicode data tables.
Character tables
----------------
For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, a set of tables that is
built into the library is used. The pcre2_maketables() function can be called
by an application to create a new set of tables in the current locale. This are
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
specified for ./configure, a new version of pcre2_chartables.c is built by the
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale that is set for your system will control the contents of these default
tables. You can change the default tables by editing pcre2_chartables.c and
then re-building PCRE2. If you do this, you should take care to ensure that the
file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
When the pcre2_dftables program is run as a result of specifying
--enable-rebuild-chartables, it uses the default C locale that is set on your
system. It does not pay attention to the LC_xxx environment variables. In other
words, it uses the system's default locale rather than whatever the compiling
user happens to have set. If you really do want to build a source set of
character tables in a locale that is specified by the LC_xxx variables, you can
run the pcre2_dftables program by hand with the -L option. For example:
./pcre2_dftables -L pcre2_chartables.c.special
The second argument names the file where the source code for the tables is
written. The first two 256-byte tables provide lower casing and case flipping
functions, respectively. The next table consists of a number of 32-byte bit
maps which identify certain character classes such as digits, "word"
characters, white space, etc. These are used when building 32-byte bit maps
that represent character classes for code points less than 256. The final
256-byte table has bits indicating various character types, as follows:
1 white space character
2 letter
4 lower case letter
8 decimal digit
16 alphanumeric or '_'
You can also specify -b (with or without -L) when running pcre2_dftables. This
causes the tables to be written in binary instead of as source code. A set of
binary tables can be loaded into memory by an application and passed to
pcre2_compile() in the same way as tables created dynamically by calling
pcre2_maketables(). The tables are just a string of bytes, independent of
hardware characteristics such as endianness. This means they can be bundled
with an application that runs in different environments, to ensure consistent
behaviour.
See also the pcre2build section "Creating character tables at build time".
File manifest
-------------
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_compile_class.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_compile.h header for internal use
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_jit_char_inc.h header used by JIT
src/pcre2_jit_neon_inc.h header used by JIT
src/pcre2_jit_simd_inc.h header used by JIT
src/pcre2_ucp.h header for Unicode property handling
src/pcre2_util.h header for internal utils
deps/sljit/sljit_src/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
src/pcre2demo.c simple demonstration of coding calls to PCRE2
src/pcre2grep.c source of a grep utility that uses PCRE2
src/pcre2test.c comprehensive test program
src/pcre2_jit_test.c JIT test program
src/pcre2posix_test.c POSIX wrapper API test program
(C) Auxiliary files:
AUTHORS.md information about the authors of PCRE2
ChangeLog log of changes to the code
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE.md conditions for the use of PCRE2
COPYING the same, using GNU's standard name
SECURITY.md information on reporting vulnerabilities
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
RunTest.bat a Windows batch file for running tests
RunGrepTest.bat a Windows batch file for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
m4/* m4 macros (used by autoconf)
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2-config.txt plain text documentation of pcre2-config script
doc/pcre2grep.txt plain text documentation of grep utility program
doc/pcre2test.txt plain text documentation of test program
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
ar-lib )
config.guess )
config.sub )
depcomp ) helper tools generated by libtool and
compile ) automake, used internally by ./configure
install-sh )
ltmain.sh )
missing )
test-driver )
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for CMake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindEditline.cmake
cmake/FindReadline.cmake
cmake/pcre2-config-version.cmake.in
cmake/pcre2-config.cmake.in
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for building PCRE2 "by hand"
src/pcre2.h.generic ) a version of the public PCRE2 header file
) for use in non-"configure" environments
src/config.h.generic ) a version of config.h for use in non-"configure"
) environments
(F) Auxiliary files for building PCRE2 using other build systems
BUILD.bazel )
MODULE.bazel ) files used by the Bazel build system
WORKSPACE.bazel )
build.zig file used by zig's build system
(G) Auxiliary files for building PCRE2 under OpenVMS
vms/configure.com )
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
vms/pcre2.h_patch )
vms/stdint.h )
==============================
Last updated: 18 December 2024
==============================

View File

@@ -0,0 +1,20 @@
PCRE sources used by wxWidgets
==============================
This directory contains PCRE version used by wxWidgets.
It was created by cloning https://github.com/rurban/pcre which is, in turn, a
Git clone of the official PCRE2 subversion repository.
Updating PCRE
-------------
Fast-forward master to the new PCRE version, then merge it into wx branch
using `git merge --no-commit`. Before committing the merge, additionally:
1. Rerun autogen.sh.
1. Copy `src/pcre2_chartables.c.dist` to `src/pcre2_chartables.c`.
1. Examine changes to `src/pcre2.h.generic` and `src/config.h.generic` and
propagate them to `src/wx/pcre2.h` and `src/wx/config.h` respectively.
Add all changed files to the commit.

View File

@@ -0,0 +1,56 @@
# PCRE2 - Perl-Compatible Regular Expressions
The PCRE2 library is a set of C functions that implement regular expression
pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its
own native API, as well as a set of wrapper functions that correspond to the
POSIX regular expression API. The PCRE2 library is free, even for building
proprietary software. It comes in three forms, for processing 8-bit, 16-bit,
or 32-bit code units, in either literal or UTF encoding.
PCRE2 was first released in 2015 to replace the API in the original PCRE
library, which is now obsolete and no longer maintained. As well as a more
flexible API, the code of PCRE2 has been much improved since the fork.
## Download
As well as downloading from the
[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2
or the older, unmaintained PCRE1 library from an
[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.
You can check out the PCRE2 source code via Git or Subversion:
git clone https://github.com/PCRE2Project/pcre2.git
svn co https://github.com/PCRE2Project/pcre2.git
## Contributed Ports
If you just need the command-line PCRE2 tools on Windows, precompiled binary
versions are available at this
[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html).
A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its
default character encoding, can be found at
[http://www.cbttape.org](http://www.cbttape.org/) (File 939).
## Documentation
You can read the PCRE2 documentation
[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).
Comparisons to Perl's regular expression semantics can be found in the
community authored Wikipedia entry for PCRE.
There is a curated summary of changes for each PCRE release, copies of
documentation from older releases, and other useful information from the third
party authored
[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html).
## Contact
To report a problem with the PCRE2 library, or to make a feature request, please
use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of
PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any
announcements will be made. You can browse the
[list archives](https://groups.google.com/g/pcre2-dev).

1157
libs/wxWidgets-3.3.1/3rdparty/pcre/RunGrepTest vendored Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

947
libs/wxWidgets-3.3.1/3rdparty/pcre/RunTest vendored Executable file
View File

@@ -0,0 +1,947 @@
#! /bin/sh
###############################################################################
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
# selected, depending on which build-time options were used.
#
# When JIT support is available, all appropriate tests are run with and without
# JIT, unless "-nojit" is given on the command line. There are also two tests
# for JIT-specific features, one to be run when JIT support is available
# (unless "-nojit" is specified), and one when it is not.
#
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
# possible to select which to test by giving "-8", "-16" or "-32" on the
# command line.
#
# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
# except test 10. Whatever order the arguments are in, these tests are always
# run in numerical order.
#
# If no specific tests are selected (which is the case when this script is run
# via 'make check') the default is to run all the numbered tests.
#
# There may also be named (as well as numbered) tests for special purposes. At
# present there is just one, called "heap". This test's output contains the
# sizes of heap frames and frame vectors, which depend on the environment. It
# is therefore not run unless explicitly requested.
#
# Inappropriate tests are automatically skipped (with a comment to say so). For
# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
# support is compiled, test 15 is skipped.
#
# Other arguments can be one of the words "-valgrind", "-valgrind-log", or
# "-sim" followed by an argument to run cross-compiled executables under a
# simulator, for example:
#
# RunTest 3 -sim "qemu-arm -s 8388608"
#
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
# be given without the leading "-" character.
#
# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need
# very much more stack than normal. In environments where the stack can be
# set at runtime, -bigstack sets a gigantic stack.
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
# EBCDIC test that can be useful for checking certain EBCDIC features, even
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
# this test to be run.
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################
# Define test titles in variables so that they can be output as a list. Some
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)"
title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
title2="Test 2: API, errors, internals and non-Perl stuff"
title3="Test 3: Locale-specific features"
title4A="Test 4: UTF"
title4B=" and Unicode property support (compatible with Perl >= 5.10)"
title5A="Test 5: API, internals, and non-Perl stuff for UTF"
title5B=" and UCP support"
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
title7A="Test 7: DFA matching with UTF"
title7B=" and Unicode property support"
title8="Test 8: Internal offsets and code size tests"
title9="Test 9: Specials for the basic 8-bit library"
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
title14="Test 14: DFA specials for UTF and UCP support"
title15="Test 15: Non-JIT limits and other non-JIT tests"
title16="Test 16: JIT-specific features when JIT is not available"
title17="Test 17: JIT-specific features when JIT is available"
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
title20="Test 20: Serialization and code copy tests"
title21="Test 21: \C tests without UTF (supported for DFA matching)"
title22="Test 22: \C tests with UTF (not supported for DFA matching)"
title23="Test 23: \C disabled test"
title24="Test 24: Non-UTF pattern conversion tests"
title25="Test 25: UTF pattern conversion tests"
title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
title27="Test 27: Auto-generated unicode property tests"
maxtest=27
titleheap="Test 'heap': Environment-specific heap tests"
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title0
echo $title1
echo $title2 "(not UTF or UCP)"
echo $title3
echo $title4A $title4B
echo $title5A $title5B
echo $title6
echo $title7A $title7B
echo $title8
echo $title9
echo $title10
echo $title11
echo $title12
echo $title13
echo $title14
echo $title15
echo $title16
echo $title17
echo $title18
echo $title19
echo $title20
echo $title21
echo $title22
echo $title23
echo $title24
echo $title25
echo $title26
echo $title27
echo ""
echo $titleheap
echo ""
echo "Numbered tests are automatically run if nothing selected."
echo "Named tests must be explicitly selected."
exit 0
fi
# Set up a suitable "diff" command for comparison. Some systems
# have a diff that lacks a -u option. Try to deal with this.
cf="diff"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
# Find the test data
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
testdata="$srcdir/testdata"
elif [ -d "./testdata" ] ; then
testdata=./testdata
elif [ -d "../testdata" ] ; then
testdata=../testdata
else
echo "Cannot find the testdata directory"
exit 1
fi
# ------ Function to check results of a test -------
# This function is called with three parameters:
#
# $1 the value of $? after a call to pcre2test
# $2 the suffix of the output file to compare with
# $3 the $opt value (empty, -jit, or -dfa)
#
# Note: must define using name(), not "function name", for Solaris.
checkresult()
{
if [ $1 -ne 0 ] ; then
echo "** pcre2test failed - check testtry"
exit 1
fi
case "$3" in
-jit) with=" with JIT";;
-dfa) with=" with DFA";;
*) with="";;
esac
$cf $testdata/testoutput$2 testtry
if [ $? != 0 ] ; then
echo ""
echo "** Test $2 failed$with"
exit 1
fi
echo " OK$with"
}
# ------ Function to run and check a special pcre2test arguments test -------
checkspecial()
{
$sim $valgrind $vjs $pcre2test $1 >>testtry
if [ $? -ne 0 ] ; then
echo "** pcre2test $1 failed - check testtry"
exit 1
fi
}
# ------ Test setup ------
# Default values
arg8=
arg16=
arg32=
nojit=
bigstack=
sim=
skip=
valgrind=
vjs=
: ${pcre2test:=./pcre2test}
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
if [ ! -x $pcre2test ] ; then
echo "** $pcre2test does not exist or is not executable."
exit 1
fi
# Process options and select which tests to run; for those that are explicitly
# requested, check that the necessary optional facilities are available.
do0=no
do1=no
do2=no
do3=no
do4=no
do5=no
do6=no
do7=no
do8=no
do9=no
do10=no
do11=no
do12=no
do13=no
do14=no
do15=no
do16=no
do17=no
do18=no
do19=no
do20=no
do21=no
do22=no
do23=no
do24=no
do25=no
do26=no
do27=no
doheap=no
doebcdic=no
while [ $# -gt 0 ] ; do
case $1 in
0) do0=yes;;
1) do1=yes;;
2) do2=yes;;
3) do3=yes;;
4) do4=yes;;
5) do5=yes;;
6) do6=yes;;
7) do7=yes;;
8) do8=yes;;
9) do9=yes;;
10) do10=yes;;
11) do11=yes;;
12) do12=yes;;
13) do13=yes;;
14) do14=yes;;
15) do15=yes;;
16) do16=yes;;
17) do17=yes;;
18) do18=yes;;
19) do19=yes;;
20) do20=yes;;
21) do21=yes;;
22) do22=yes;;
23) do23=yes;;
24) do24=yes;;
25) do25=yes;;
26) do26=yes;;
27) do27=yes;;
heap) doheap=yes;;
ebcdic) doebcdic=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
bigstack|-bigstack) bigstack=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";;
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
else
echo "Unknown option or test selector '$1'"; exit 1
fi
;;
*-*)
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
tf=`expr "$1" : '\([0-9]*\)'`
tt=`expr "$1" : '.*-\([0-9]*\)'`
if [ "$tt" = "" ] ; then tt=$maxtest; fi
if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then
echo "Invalid test range '$1'"; exit 1
fi
while expr "$tf" "<=" "$tt" >/dev/null; do
eval do${tf}=yes
tf=`expr $tf + 1`
done
else
echo "Invalid test range '$1'"; exit 1
fi
;;
*) echo "Unknown option or test selector '$1'"; exit 1;;
esac
shift
done
# Find which optional facilities are available.
$sim $pcre2test -C linksize >/dev/null
link_size=$?
if [ $link_size -lt 2 ] ; then
echo "RunTest: Failed to find internal link size"
exit 1
fi
if [ $link_size -gt 4 ] ; then
echo "RunTest: Failed to find internal link size"
exit 1
fi
# If it is possible to set the system stack size and -bigstack was given,
# set up a large stack.
$sim $pcre2test -S 32 /dev/null /dev/null
support_setstack=$?
if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
setstack="-S 32"
else
setstack=""
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
# one need be.
$sim $pcre2test -C pcre2-8 >/dev/null
support8=$?
$sim $pcre2test -C pcre2-16 >/dev/null
support16=$?
$sim $pcre2test -C pcre2-32 >/dev/null
support32=$?
# \C may be disabled
$sim $pcre2test -C backslash-C >/dev/null
supportBSC=$?
# Initialize all bitsizes skipped
test8=skip
test16=skip
test32=skip
# If no bitsize arguments, select all that are available
if [ "$arg8$arg16$arg32" = "" ] ; then
if [ $support8 -ne 0 ] ; then
test8=-8
fi
if [ $support16 -ne 0 ] ; then
test16=-16
fi
if [ $support32 -ne 0 ] ; then
test32=-32
fi
# Otherwise, select requested bit sizes
else
if [ "$arg8" = yes ] ; then
if [ $support8 -eq 0 ] ; then
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
exit 1
fi
test8=-8
fi
if [ "$arg16" = yes ] ; then
if [ $support16 -eq 0 ] ; then
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
exit 1
fi
test16=-16
fi
if [ "$arg32" = yes ] ; then
if [ $support32 -eq 0 ] ; then
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
exit 1
fi
test32=-32
fi
fi
# UTF support is implied by Unicode support, and it always applies to all bit
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
# UTF-32 support.
$sim $pcre2test -C unicode >/dev/null
utf=$?
# When JIT is used with valgrind, we need to set up valgrind suppressions as
# otherwise there are a lot of false positive valgrind reports when the
# the hardware supports SSE2.
jitopt=
$sim $pcre2test -C jit >/dev/null
jit=$?
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
jitopt=-jit
if [ "$valgrind" != "" ] ; then
vjs="--suppressions=$testdata/valgrind-jit.supp"
fi
fi
# If no specific tests were requested, select all the numbered tests. Those
# that are not relevant will be automatically skipped.
if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
$doheap = no -a $doebcdic = no \
]; then
do0=yes
do1=yes
do2=yes
do3=yes
do4=yes
do5=yes
do6=yes
do7=yes
do8=yes
do9=yes
do10=yes
do11=yes
do12=yes
do13=yes
do14=yes
do15=yes
do16=yes
do17=yes
do18=yes
do19=yes
do20=yes
do21=yes
do22=yes
do23=yes
do24=yes
do25=yes
do26=yes
do27=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
# only of explicit skips.
for i in $skip; do eval do$i=no; done
# Show which release and which test data
echo ""
echo PCRE2 C library tests using test data from $testdata
$sim $pcre2test /dev/null
echo ""
# ------ Normal Tests ------
for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
-8) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
esac
# Test 0 is a special test. Its output is not checked, because it will
# be different on different hardware and with different configurations.
# Running this test just exercises the code.
if [ $do0 = yes ] ; then
echo $title0
echo '/abc/jit,memory,framesize' >testSinput
echo ' abc' >>testSinput
echo '' >testtry
checkspecial '-C'
checkspecial '--help'
if [ $support_setstack -eq 0 ] ; then
checkspecial '-S 1 -t 10 testSinput'
fi
echo " OK"
fi
# Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry
checkresult $? 1 "$opt"
done
fi
# PCRE2 tests that are not Perl-compatible: API, errors, internals. We copy
# the testbtables file to the current directory for use by this test.
if [ $do2 = yes ] ; then
echo $title2 "(excluding UTF-$bits)"
cp $testdata/testbtables .
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
saverc=$?
if [ $saverc = 0 ] ; then
$sim $valgrind ${opt:+$vjs} $pcre2test -q $bmode $opt -error -80,-62,-2,-1,0,100,101,191,300 >>testtry
checkresult $? 2 "$opt"
else
checkresult $saverc 2 "$opt"
fi
done
fi
# Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french"
# or "fr" locale is available. The first two are Unix-like standards; the
# last two are for Windows. Unfortunately, different versions of the French
# locale give different outputs for some items. This test passes if the
# output matches any one of the alternative output files.
if [ $do3 = yes ] ; then
locale=
# In some environments locales that are listed by the "locale -a"
# command do not seem to work with setlocale(). Therefore, we do
# a preliminary test to see if pcre2test can set one before going
# on to use it.
for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do
locale -a | grep "^$loc\$" >/dev/null
if [ $? -eq 0 ] ; then
echo "/a/locale=$loc" | \
$sim $valgrind $pcre2test -q $bmode | \
grep "Failed to set locale" >/dev/null
if [ $? -ne 0 ] ; then
locale=$loc
if [ "$locale" = "fr_FR" ] ; then
infile=$testdata/testinput3
outfile=$testdata/testoutput3
outfile2=$testdata/testoutput3A
outfile3=$testdata/testoutput3B
else
infile=test3input
outfile=test3output
outfile2=test3outputA
outfile3=test3outputB
sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input
sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output
sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA
sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB
fi
break
fi
fi
done
if [ "$locale" != "" ] ; then
echo $title3 "(using '$locale' locale)"
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $infile testtry
if [ $? = 0 ] ; then
case "$opt" in
-jit) with=" with JIT";;
*) with="";;
esac
if $cf $outfile testtry >teststdout || \
$cf $outfile2 testtry >teststdout || \
$cf $outfile3 testtry >teststdout
then
echo " OK$with"
else
echo "** Locale test did not run successfully$with. The output did not match"
echo " $outfile, $outfile2 or $outfile3."
echo " This may mean that there is a problem with the locale settings rather"
echo " than a bug in PCRE2."
exit 1
fi
else exit 1
fi
done
else
echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA',"
echo "'fr' or 'french' locales can be set, or the \"locale\" command is"
echo "not available to check for them."
echo " "
fi
fi
# Tests for UTF and Unicode property support
if [ $do4 = yes ] ; then
echo ${title4A}-${bits}${title4B}
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry
checkresult $? 4 "$opt"
done
fi
fi
if [ $do5 = yes ] ; then
echo ${title5A}-${bits}$title5B
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry
checkresult $? 5 "$opt"
done
fi
fi
# Tests for DFA matching support
if [ $do6 = yes ] ; then
echo $title6
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput6 testtry
checkresult $? 6 ""
fi
if [ $do7 = yes ] ; then
echo ${title7A}-${bits}$title7B
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry
checkresult $? 7 ""
fi
fi
# Test of internal offsets and code sizes. This test is run only when there
# is UTF/UCP support. The actual tests are mostly the same as in some of the
# above, but in this test we inspect some offsets and sizes. This is a
# doublecheck for the maintainer, just in case something changes unexpectedly.
# The output from this test is different in 8-bit, 16-bit, and 32-bit modes
# and for different link sizes, so there are different output files for each
# mode and link size.
if [ $do8 = yes ] ; then
echo $title8
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput8 testtry
checkresult $? 8-$bits-$link_size ""
fi
fi
# Tests for 8-bit-specific features
if [ "$do9" = yes ] ; then
echo $title9
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry
checkresult $? 9 "$opt"
done
fi
fi
# Tests for UTF-8 and UCP 8-bit-specific features
if [ "$do10" = yes ] ; then
echo $title10
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry
checkresult $? 10 "$opt"
done
fi
fi
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
if [ $do11 = yes ] ; then
echo $title11
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry
checkresult $? 11-$bits "$opt"
done
fi
fi
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
# is different for the two widths.
if [ $do12 = yes ] ; then
echo $title12
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry
checkresult $? 12-$bits "$opt"
done
fi
fi
# Tests for 16/32-bit-specific features in DFA non-UTF modes
if [ $do13 = yes ] ; then
echo $title13
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput13 testtry
checkresult $? 13 ""
fi
fi
# Tests for DFA UTF and UCP features. Output is different for the different widths.
if [ $do14 = yes ] ; then
echo $title14
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry
checkresult $? 14-$bits ""
fi
fi
# Test non-JIT match and recursion limits
if [ $do15 = yes ] ; then
echo $title15
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput15 testtry
checkresult $? 15 ""
fi
# Test JIT-specific features when JIT is not available
if [ $do16 = yes ] ; then
echo $title16
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput16 testtry
checkresult $? 16 ""
fi
fi
# Test JIT-specific features when JIT is available
if [ $do17 = yes ] ; then
echo $title17
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or nojit was specified"
else
$sim $valgrind $vjs $pcre2test -q $setstack $bmode $testdata/testinput17 testtry
checkresult $? 17 ""
fi
fi
# Tests for the POSIX interface without UTF/UCP (8-bit only)
if [ $do18 = yes ] ; then
echo $title18
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput18 testtry
checkresult $? 18 ""
fi
fi
# Tests for the POSIX interface with UTF/UCP (8-bit only)
if [ $do19 = yes ] ; then
echo $title19
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput19 testtry
checkresult $? 19 ""
fi
fi
# Serialization tests
if [ $do20 = yes ] ; then
echo $title20
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput20 testtry
checkresult $? 20 ""
fi
# \C tests without UTF - DFA matching is supported
if [ "$do21" = yes ] ; then
echo $title21
if [ $supportBSC -eq 0 ] ; then
echo " Skipped because \C is disabled"
else
for opt in "" $jitopt -dfa; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry
checkresult $? 21 "$opt"
done
fi
fi
# \C tests with UTF - DFA matching is not supported for \C in UTF mode
if [ "$do22" = yes ] ; then
echo $title22
if [ $supportBSC -eq 0 ] ; then
echo " Skipped because \C is disabled"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry
checkresult $? 22-$bits "$opt"
done
fi
fi
# Test when \C is disabled
if [ "$do23" = yes ] ; then
echo $title23
if [ $supportBSC -ne 0 ] ; then
echo " Skipped because \C is not disabled"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput23 testtry
checkresult $? 23 ""
fi
fi
# Non-UTF pattern conversion tests
if [ "$do24" = yes ] ; then
echo $title24
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput24 testtry
checkresult $? 24 ""
fi
# UTF pattern conversion tests
if [ "$do25" = yes ] ; then
echo $title25
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput25 testtry
checkresult $? 25 ""
fi
fi
# Unicode property tests
if [ $do26 = yes ] ; then
echo $title26
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
checkresult $? 26 "$opt"
done
fi
fi
# Auto-generated Unicode property tests
if [ $do27 = yes ] ; then
echo $title27
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput27 testtry
checkresult $? 27 "$opt"
done
fi
fi
# Manually selected heap tests - output may vary in different environments,
# which is why that are not automatically run.
if [ $doheap = yes ] ; then
echo $titleheap
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinputheap testtry
checkresult $? heap-$bits ""
fi
# End of loop for 8/16/32-bit tests
done
# ------ Special EBCDIC Test -------
if [ $doebcdic = yes ] ; then
$sim $valgrind $pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
fi
# Clean up local working files
rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
# End

View File

@@ -0,0 +1,564 @@
@echo off
@rem
@rem MS Windows batch file to run pcre2test on testfiles with the correct
@rem options. This file must use CRLF linebreaks to function properly,
@rem and requires both pcre2test and pcre2grep.
@rem
@rem ------------------------ HISTORY ----------------------------------
@rem This file was originally contributed to PCRE1 by Ralf Junker, and touched
@rem up by Daniel Richard G. Tests 10-12 added by Philip H.
@rem Philip H also changed test 3 to use "wintest" files.
@rem
@rem Updated by Tom Fortmann to support explicit test numbers on the command
@rem line. Added argument validation and added error reporting.
@rem
@rem Sheri Pierce added logic to skip feature dependent tests
@rem tests 4 5 7 10 12 14 19 22 25 and 26 require Unicode support
@rem 8 requires Unicode and link size 2
@rem 16 requires absence of jit support
@rem 17 requires presence of jit support
@rem Sheri P also added override tests for study and jit testing
@rem Zoltan Herczeg added libpcre16 support
@rem Zoltan Herczeg added libpcre32 support
@rem -------------------------------------------------------------------
@rem
@rem The file was converted for PCRE2 by PH, February 2015.
@rem Updated for new test 14 (moving others up a number), August 2015.
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
@rem PH added missing "set type" for test 22, April 2016.
@rem PH added copy command for new testbtables file, November 2020
@rem PH caused it to show comparison output when comparison failed, July 2023
@rem PH updated unknown error number in test
setlocal enabledelayedexpansion
if [%srcdir%]==[] (
if exist testdata\ set srcdir=.)
if [%srcdir%]==[] (
if exist ..\testdata\ set srcdir=..)
if [%srcdir%]==[] (
if exist ..\..\testdata\ set srcdir=..\..)
if NOT exist %srcdir%\testdata\ (
echo Error: distribution testdata folder not found!
call :conferror
exit /b 1
goto :eof
)
if [%pcre2test%]==[] set pcre2test=.\pcre2test.exe
echo source dir is %srcdir%
echo pcre2test=%pcre2test%
if NOT exist %pcre2test% (
echo Error: %pcre2test% not found!
echo.
call :conferror
exit /b 1
)
%pcre2test% -C linksize >NUL
set link_size=%ERRORLEVEL%
%pcre2test% -C pcre2-8 >NUL
set support8=%ERRORLEVEL%
%pcre2test% -C pcre2-16 >NUL
set support16=%ERRORLEVEL%
%pcre2test% -C pcre2-32 >NUL
set support32=%ERRORLEVEL%
%pcre2test% -C unicode >NUL
set unicode=%ERRORLEVEL%
%pcre2test% -C jit >NUL
set jit=%ERRORLEVEL%
%pcre2test% -C backslash-C >NUL
set supportBSC=%ERRORLEVEL%
if %support8% EQU 1 (
if not exist testout8 md testout8
if not exist testoutjit8 md testoutjit8
)
if %support16% EQU 1 (
if not exist testout16 md testout16
if not exist testoutjit16 md testoutjit16
)
if %support32% EQU 1 (
if not exist testout32 md testout32
if not exist testoutjit32 md testoutjit32
)
set do1=no
set do2=no
set do3=no
set do4=no
set do5=no
set do6=no
set do7=no
set do8=no
set do9=no
set do10=no
set do11=no
set do12=no
set do13=no
set do14=no
set do15=no
set do16=no
set do17=no
set do18=no
set do19=no
set do20=no
set do21=no
set do22=no
set do23=no
set do24=no
set do25=no
set do26=no
set do27=no
set all=yes
for %%a in (%*) do (
set valid=no
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27) do if %%v == %%a set valid=yes
if "!valid!" == "yes" (
set do%%a=yes
set all=no
) else (
echo Invalid test number - %%a!
echo Usage %0 [ test_number ] ...
echo Where test_number is one or more optional test numbers 1 through 27, default is all tests.
exit /b 1
)
)
set failed="no"
if "%all%" == "yes" (
set do1=yes
set do2=yes
set do3=yes
set do4=yes
set do5=yes
set do6=yes
set do7=yes
set do8=yes
set do9=yes
set do10=yes
set do11=yes
set do12=yes
set do13=yes
set do14=yes
set do15=yes
set do16=yes
set do17=yes
set do18=yes
set do19=yes
set do20=yes
set do21=yes
set do22=yes
set do23=yes
set do24=yes
set do25=yes
set do26=yes
set do27=yes
)
@echo RunTest.bat's pcre2test output is written to newly created subfolders
@echo named testout{8,16,32} and testoutjit{8,16,32}.
@echo.
set mode=
set bits=8
:nextMode
if "%mode%" == "" (
if %support8% EQU 0 goto modeSkip
echo.
echo ---- Testing 8-bit library ----
echo.
)
if "%mode%" == "-16" (
if %support16% EQU 0 goto modeSkip
echo.
echo ---- Testing 16-bit library ----
echo.
)
if "%mode%" == "-32" (
if %support32% EQU 0 goto modeSkip
echo.
echo ---- Testing 32-bit library ----
echo.
)
if "%do1%" == "yes" call :do1
if "%do2%" == "yes" call :do2
if "%do3%" == "yes" call :do3
if "%do4%" == "yes" call :do4
if "%do5%" == "yes" call :do5
if "%do6%" == "yes" call :do6
if "%do7%" == "yes" call :do7
if "%do8%" == "yes" call :do8
if "%do9%" == "yes" call :do9
if "%do10%" == "yes" call :do10
if "%do11%" == "yes" call :do11
if "%do12%" == "yes" call :do12
if "%do13%" == "yes" call :do13
if "%do14%" == "yes" call :do14
if "%do15%" == "yes" call :do15
if "%do16%" == "yes" call :do16
if "%do17%" == "yes" call :do17
if "%do18%" == "yes" call :do18
if "%do19%" == "yes" call :do19
if "%do20%" == "yes" call :do20
if "%do21%" == "yes" call :do21
if "%do22%" == "yes" call :do22
if "%do23%" == "yes" call :do23
if "%do24%" == "yes" call :do24
if "%do25%" == "yes" call :do25
if "%do26%" == "yes" call :do26
if "%do27%" == "yes" call :do27
:modeSkip
if "%mode%" == "" (
set mode=-16
set bits=16
goto nextMode
)
if "%mode%" == "-16" (
set mode=-32
set bits=32
goto nextMode
)
@rem If mode is -32, testing is finished
if %failed% == "yes" (
echo In above output, one or more of the various tests failed!
exit /b 1
)
echo All OK
goto :eof
:runsub
@rem Function to execute pcre2test and compare the output
@rem Arguments are as follows:
@rem
@rem 1 = test number
@rem 2 = outputdir
@rem 3 = test name use double quotes
@rem 4 - 9 = pcre2test options
if [%1] == [] (
echo Missing test number argument!
exit /b 1
)
if [%2] == [] (
echo Missing outputdir!
exit /b 1
)
if [%3] == [] (
echo Missing test name argument!
exit /b 1
)
if %1 == 8 (
set outnum=%1-%bits%-%link_size%
) else if %1 == 11 (
set outnum=%1-%bits%
) else if %1 == 12 (
set outnum=%1-%bits%
) else if %1 == 14 (
set outnum=%1-%bits%
) else if %1 == 22 (
set outnum=%1-%bits%
) else (
set outnum=%1
)
set testinput=testinput%1
set testoutput=testoutput%outnum%
if exist %srcdir%\testdata\win%testinput% (
set testinput=wintestinput%1
set testoutput=wintestoutput%outnum%
)
echo Test %1: %3
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
if errorlevel 1 (
echo. failed executing command-line:
echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
set failed="yes"
goto :eof
) else if [%1]==[2] (
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -80,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
)
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% >NUL
if errorlevel 1 (
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
if [%1]==[3] (
echo.
echo ** Test 3 failure usually means french locale is not
echo ** available on the system, rather than a bug or problem with PCRE2.
echo.
goto :eof
)
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
set failed="yes"
goto :eof
)
echo. Passed.
goto :eof
:do1
call :runsub 1 testout "Main non-UTF, non-UCP functionality (Compatible with Perl >= 5.10)" -q
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do2
copy /y %srcdir%\testdata\testbtables testbtables
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do3
call :runsub 3 testout "Locale-specific features" -q
if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do4
if %unicode% EQU 0 (
echo Test 4 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 4 testout "UTF-%bits% and Unicode property support - (Compatible with Perl >= 5.10)" -q
if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do5
if %unicode% EQU 0 (
echo Test 5 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits% and UCP" -q
if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do6
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q
goto :eof
:do7
if %unicode% EQU 0 (
echo Test 7 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q
goto :eof
:do8
if NOT %link_size% EQU 2 (
echo Test 8 Skipped because link size is not 2.
goto :eof
)
if %unicode% EQU 0 (
echo Test 8 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 8 testout "Internal offsets and code size tests" -q
goto :eof
:do9
if NOT %bits% EQU 8 (
echo Test 9 Skipped when running 16/32-bit tests.
goto :eof
)
call :runsub 9 testout "Specials for the basic 8-bit library" -q
if %jit% EQU 1 call :runsub 9 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do10
if NOT %bits% EQU 8 (
echo Test 10 Skipped when running 16/32-bit tests.
goto :eof
)
if %unicode% EQU 0 (
echo Test 10 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 10 testout "Specials for the 8-bit library with Unicode support" -q
if %jit% EQU 1 call :runsub 10 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do11
if %bits% EQU 8 (
echo Test 11 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 11 testout "Specials for the basic 16/32-bit library" -q
if %jit% EQU 1 call :runsub 11 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do12
if %bits% EQU 8 (
echo Test 12 Skipped when running 8-bit tests.
goto :eof
)
if %unicode% EQU 0 (
echo Test 12 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 12 testout "Specials for the 16/32-bit library with Unicode support" -q
if %jit% EQU 1 call :runsub 12 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do13
if %bits% EQU 8 (
echo Test 13 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q
goto :eof
:do14
if %unicode% EQU 0 (
echo Test 14 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 14 testout "DFA specials for UTF and UCP support" -q
goto :eof
:do15
call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q
goto :eof
:do16
if %jit% EQU 1 (
echo Test 16 Skipped due to presence of JIT support.
goto :eof
)
call :runsub 16 testout "JIT-specific features when JIT is not available" -q
goto :eof
:do17
if %jit% EQU 0 (
echo Test 17 Skipped due to absence of JIT support.
goto :eof
)
call :runsub 17 testout "JIT-specific features when JIT is available" -q
goto :eof
:do18
if %bits% EQU 16 (
echo Test 18 Skipped when running 16-bit tests.
goto :eof
)
if %bits% EQU 32 (
echo Test 18 Skipped when running 32-bit tests.
goto :eof
)
call :runsub 18 testout "POSIX interface, excluding UTF-8 and UCP" -q
goto :eof
:do19
if %bits% EQU 16 (
echo Test 19 Skipped when running 16-bit tests.
goto :eof
)
if %bits% EQU 32 (
echo Test 19 Skipped when running 32-bit tests.
goto :eof
)
if %unicode% EQU 0 (
echo Test 19 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q
goto :eof
:do20
call :runsub 20 testout "Serialization tests" -q
goto :eof
:do21
if %supportBSC% EQU 0 (
echo Test 21 Skipped due to absence of backslash-C support.
goto :eof
)
call :runsub 21 testout "Backslash-C tests without UTF" -q
call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do22
if %supportBSC% EQU 0 (
echo Test 22 Skipped due to absence of backslash-C support.
goto :eof
)
if %unicode% EQU 0 (
echo Test 22 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 22 testout "Backslash-C tests with UTF" -q
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do23
if %supportBSC% EQU 1 (
echo Test 23 Skipped due to presence of backslash-C support.
goto :eof
)
call :runsub 23 testout "Backslash-C disabled test" -q
goto :eof
:do24
call :runsub 24 testout "Non-UTF pattern conversion tests" -q
goto :eof
:do25
if %unicode% EQU 0 (
echo Test 25 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 25 testout "UTF pattern conversion tests" -q
goto :eof
:do26
if %unicode% EQU 0 (
echo Test 26 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 26 testout "Unicode property tests (Compatible with Perl >= 5.38)" -q
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do27
if %unicode% EQU 0 (
echo Test 27 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 27 testout "Auto-generated unicode property tests" -q
if %jit% EQU 1 call :runsub 27 testoutjit "Test with JIT Override" -q -jit
goto :eof
:conferror
@echo.
@echo Either your build is incomplete or you have a configuration error.
@echo.
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
@echo project, pcre2_test.bat defines variables and automatically calls RunTest.bat.
@echo For manual testing of all available features, after configuring with cmake
@echo and building, you can run the built pcre2_test.bat. For best results with
@echo cmake builds and tests avoid directories with full path names that include
@echo spaces for source or build.
@echo.
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
@echo for input and verification should be found automatically when (from the
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
@echo runs all tests compatible with the linked pcre2 library but it can be given
@echo a test number as an argument.
@echo.
@echo If the build dir is not under the source dir you can either copy your exes
@echo to the source folder or copy RunTest.bat and the testdata folder to the
@echo location of your built exes and then run RunTest.bat.
@echo.
goto :eof

View File

@@ -0,0 +1,44 @@
# Security policies
## Release security
The PCRE2 project provides source-only releases, with no binaries.
These source releases can be downloaded from the
[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each
release file is GPG-signed.
* Releases up to and including 10.44 are signed by Philip Hazel (GPG key:
<kbd>45F68D54BBE23FB3039B46E59766E084FB0F43D8</kbd>)
* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key:
<kbd>A95536204A3BB489715231282A98E77EB6F24CA8</kbd>, cross-signed by Philip
Hazel's key for release continuity)
From releases 10.45 onwards, the source code will additionally be provided via
Git checkout of the (GPG-signed) release tag.
Please contact the maintainers for any queries about release integrity or the
project's supply-chain.
## Reporting vulnerabilities
The PCRE2 project prioritises security. We appreciate third-party testing and
security research, and would be grateful if you could responsibly disclose your
findings to us. We will make every effort to acknowledge your contributions.
To report a security issue, please use the GitHub Security Advisory
["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new)
tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of
the maintainers.)
### Timeline
As a very small volunteer team, we cannot guarantee rapid response, but would
aim to respond within 1 week, or perhaps 2 during holidays.
### Response procedure
PCRE2 has never previously made a rapid or embargoed release in response to a
security incident. We would work with security managers from trusted downstream
distributors, such as major Linux distributions, before disclosing the
vulnerability publicly.

View File

@@ -0,0 +1 @@
# See MODULE.bazel

45
libs/wxWidgets-3.3.1/3rdparty/pcre/autogen.sh vendored Executable file
View File

@@ -0,0 +1,45 @@
#!/bin/sh
# Running aclocal here first (as happened for a while) caused the macros that
# libtoolize puts in the m4 directory to be newer than the aclocal.m4 file that
# aclocal creates. This meant that the next "make" cause aclocal to be run
# again. Moving aclocal to after libtoolize does not seem to cause any
# problems, and it fixes this issue.
# GNU libtool is named differently on some systems. This code tries several
# variants like glibtoolize (MacOSX) and libtoolize1x (FreeBSD)
set +ex
echo "Looking for a version of libtoolize (which can have different names)..."
libtoolize=""
for l in glibtoolize libtoolize15 libtoolize14 libtoolize ; do
$l --version > /dev/null 2>&1
if [ $? = 0 ]; then
libtoolize=$l
echo "Found $l"
break
fi
echo "Did not find $l"
done
if [ "x$libtoolize" = "x" ]; then
echo "Can't find libtoolize on your system"
exit 1
fi
set -ex
$libtoolize -c -f
rm -rf autom4te.cache Makefile.in aclocal.m4
aclocal --force -I m4
autoconf -f -W all,no-obsolete
autoheader -f -W all
# Added no-portability to suppress automake 1.12's warning about the use
# of recursive variables.
automake -a -c -f -W all,no-portability
rm -rf autom4te.cache
exit 0
# end autogen.sh

View File

@@ -0,0 +1,173 @@
const std = @import("std");
pub const CodeUnitWidth = enum {
@"8",
@"16",
@"32",
};
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static);
const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8";
const pcre2_header_dir = b.addWriteFiles();
const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h");
const config_header = b.addConfigHeader(
.{
.style = .{ .cmake = b.path("config-cmake.h.in") },
.include_path = "config.h",
},
.{
.HAVE_ASSERT_H = true,
.HAVE_UNISTD_H = (target.result.os.tag != .windows),
.HAVE_WINDOWS_H = (target.result.os.tag == .windows),
.HAVE_MEMMOVE = true,
.HAVE_STRERROR = true,
.SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8",
.SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16",
.SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32",
.SUPPORT_UNICODE = true,
.PCRE2_EXPORT = null,
.PCRE2_LINK_SIZE = 2,
.PCRE2_HEAP_LIMIT = 20000000,
.PCRE2_MATCH_LIMIT = 10000000,
.PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT",
.PCRE2_MAX_VARLOOKBEHIND = 255,
.NEWLINE_DEFAULT = 2,
.PCRE2_PARENS_NEST_LIMIT = 250,
},
);
// pcre2-8/16/32.so
const lib = std.Build.Step.Compile.create(b, .{
.name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}),
.root_module = .{
.target = target,
.optimize = optimize,
.link_libc = true,
},
.kind = .lib,
.linkage = linkage,
});
lib.defineCMacro("HAVE_CONFIG_H", null);
lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
if (linkage == .static) {
lib.defineCMacro("PCRE2_STATIC", null);
}
lib.addConfigHeader(config_header);
lib.addIncludePath(pcre2_header_dir.getDirectory());
lib.addIncludePath(b.path("src"));
lib.addCSourceFile(.{
.file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"),
});
lib.addCSourceFiles(.{
.files = &.{
"src/pcre2_auto_possess.c",
"src/pcre2_chkdint.c",
"src/pcre2_compile.c",
"src/pcre2_compile_class.c",
"src/pcre2_config.c",
"src/pcre2_context.c",
"src/pcre2_convert.c",
"src/pcre2_dfa_match.c",
"src/pcre2_error.c",
"src/pcre2_extuni.c",
"src/pcre2_find_bracket.c",
"src/pcre2_jit_compile.c",
"src/pcre2_maketables.c",
"src/pcre2_match.c",
"src/pcre2_match_data.c",
"src/pcre2_newline.c",
"src/pcre2_ord2utf.c",
"src/pcre2_pattern_info.c",
"src/pcre2_script_run.c",
"src/pcre2_serialize.c",
"src/pcre2_string_utils.c",
"src/pcre2_study.c",
"src/pcre2_substitute.c",
"src/pcre2_substring.c",
"src/pcre2_tables.c",
"src/pcre2_ucd.c",
"src/pcre2_valid_utf.c",
"src/pcre2_xclass.c",
},
});
lib.installHeader(pcre2_header, "pcre2.h");
b.installArtifact(lib);
// pcre2test
const pcre2test = b.addExecutable(.{
.name = "pcre2test",
.target = target,
.optimize = optimize,
});
// pcre2-posix.so
if (codeUnitWidth == CodeUnitWidth.@"8") {
const posixLib = std.Build.Step.Compile.create(b, .{
.name = "pcre2-posix",
.root_module = .{
.target = target,
.optimize = optimize,
.link_libc = true,
},
.kind = .lib,
.linkage = linkage,
});
posixLib.defineCMacro("HAVE_CONFIG_H", null);
posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
if (linkage == .static) {
posixLib.defineCMacro("PCRE2_STATIC", null);
}
posixLib.addConfigHeader(config_header);
posixLib.addIncludePath(pcre2_header_dir.getDirectory());
posixLib.addIncludePath(b.path("src"));
posixLib.addCSourceFiles(.{
.files = &.{
"src/pcre2posix.c",
},
});
posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h");
b.installArtifact(posixLib);
pcre2test.linkLibrary(posixLib);
}
// pcre2test (again)
pcre2test.defineCMacro("HAVE_CONFIG_H", null);
pcre2test.addConfigHeader(config_header);
pcre2test.addIncludePath(pcre2_header_dir.getDirectory());
pcre2test.addIncludePath(b.path("src"));
pcre2test.addCSourceFile(.{
.file = b.path("src/pcre2test.c"),
});
pcre2test.linkLibC();
pcre2test.linkLibrary(lib);
b.installArtifact(pcre2test);
}

View File

@@ -0,0 +1,22 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,13 @@
# Modified from FindReadline.cmake (PH Feb 2012)
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
set(EDITLINE_FOUND TRUE)
else()
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
find_library(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif()

View File

@@ -0,0 +1,27 @@
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
# --> BSD licensed
#
# GNU Readline library finder
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
set(READLINE_FOUND TRUE)
else()
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
# 2008-04-22 The next clause used to read like this:
#
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
# include(FindPackageHandleStandardArgs)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
#
# I was advised to modify it such that it will find an ncurses library if
# required, but not if one was explicitly given, that is, it allows the
# default to be overridden. PH
find_library(READLINE_LIBRARY NAMES readline)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
endif()

View File

@@ -0,0 +1,14 @@
set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
set(PACKAGE_VERSION_PATCH 0)
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
# Check whether the requested PACKAGE_FIND_VERSION is compatible
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
set(PACKAGE_VERSION_COMPATIBLE TRUE)
if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()

View File

@@ -0,0 +1,168 @@
# pcre2-config.cmake
# ----------------
#
# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
#
# Static vs. shared
# -----------------
# To make use of the static library instead of the shared one, one needs
# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
# Example:
# set(PCRE2_USE_STATIC_LIBS ON)
# find_package(PCRE2 CONFIG COMPONENTS 8BIT)
#
# This will define the following variables:
#
# PCRE2_FOUND - True if the system has the PCRE2 library.
# PCRE2_VERSION - The version of the PCRE2 library which was found.
#
# and the following imported targets:
#
# PCRE2::8BIT - The 8 bit PCRE2 library.
# PCRE2::16BIT - The 16 bit PCRE2 library.
# PCRE2::32BIT - The 32 bit PCRE2 library.
# PCRE2::POSIX - The POSIX PCRE2 library.
set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
set(PCRE2_8BIT_NAME pcre2-8)
set(PCRE2_16BIT_NAME pcre2-16)
set(PCRE2_32BIT_NAME pcre2-32)
set(PCRE2_POSIX_NAME pcre2-posix)
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
if(PCRE2_USE_STATIC_LIBS)
if(MSVC)
set(PCRE2_8BIT_NAME pcre2-8-static)
set(PCRE2_16BIT_NAME pcre2-16-static)
set(PCRE2_32BIT_NAME pcre2-32-static)
set(PCRE2_POSIX_NAME pcre2-posix-static)
endif()
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
set(PCRE2_PREFIX "")
endif()
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
set(PCRE2_SUFFIX "-0.dll")
elseif(MSVC)
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
find_library(
PCRE2_8BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
DOC "8 bit PCRE2 library"
)
find_library(
PCRE2_16BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
DOC "16 bit PCRE2 library"
)
find_library(
PCRE2_32BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
DOC "32 bit PCRE2 library"
)
find_library(
PCRE2_POSIX_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
DOC "8 bit POSIX PCRE2 library"
)
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
unset(PCRE2_8BIT_NAME)
unset(PCRE2_16BIT_NAME)
unset(PCRE2_32BIT_NAME)
unset(PCRE2_POSIX_NAME)
# Set version
if(PCRE2_INCLUDE_DIR)
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
endif()
# Which components have been found.
if(PCRE2_8BIT_LIBRARY)
set(PCRE2_8BIT_FOUND TRUE)
endif()
if(PCRE2_16BIT_LIBRARY)
set(PCRE2_16BIT_FOUND TRUE)
endif()
if(PCRE2_32BIT_LIBRARY)
set(PCRE2_32BIT_FOUND TRUE)
endif()
if(PCRE2_POSIX_LIBRARY)
set(PCRE2_POSIX_FOUND TRUE)
endif()
# Check if at least one component has been specified.
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
if(PCRE2_NCOMPONENTS LESS 1)
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
endif()
unset(PCRE2_NCOMPONENTS)
# When POSIX component has been specified make sure that also 8BIT component is specified.
set(PCRE2_8BIT_COMPONENT FALSE)
set(PCRE2_POSIX_COMPONENT FALSE)
foreach(component ${PCRE2_FIND_COMPONENTS})
if(component STREQUAL "8BIT")
set(PCRE2_8BIT_COMPONENT TRUE)
elseif(component STREQUAL "POSIX")
set(PCRE2_POSIX_COMPONENT TRUE)
endif()
endforeach()
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
message(
FATAL_ERROR
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
)
endif()
unset(PCRE2_8BIT_COMPONENT)
unset(PCRE2_POSIX_COMPONENT)
include(FindPackageHandleStandardArgs)
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
find_package_handle_standard_args(
PCRE2
FOUND_VAR PCRE2_FOUND
REQUIRED_VARS PCRE2_INCLUDE_DIR
HANDLE_COMPONENTS
VERSION_VAR PCRE2_VERSION
CONFIG_MODE
)
set(PCRE2_LIBRARIES)
if(PCRE2_FOUND)
foreach(component ${PCRE2_FIND_COMPONENTS})
if(PCRE2_USE_STATIC_LIBS)
add_library(PCRE2::${component} STATIC IMPORTED)
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
else()
add_library(PCRE2::${component} SHARED IMPORTED)
endif()
set_target_properties(
PCRE2::${component}
PROPERTIES
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
)
if(component STREQUAL "POSIX")
set_target_properties(
PCRE2::${component}
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
)
endif()
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
mark_as_advanced(PCRE2_${component}_LIBRARY)
endforeach()
endif()
mark_as_advanced(PCRE2_INCLUDE_DIR)

View File

@@ -0,0 +1,58 @@
/* config.h for CMake builds */
#cmakedefine HAVE_ASSERT_H 1
#cmakedefine HAVE_BUILTIN_ASSUME 1
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_BCOPY 1
#cmakedefine HAVE_MEMFD_CREATE 1
#cmakedefine HAVE_MEMMOVE 1
#cmakedefine HAVE_SECURE_GETENV 1
#cmakedefine HAVE_STRERROR 1
#cmakedefine SUPPORT_PCRE2_8 1
#cmakedefine SUPPORT_PCRE2_16 1
#cmakedefine SUPPORT_PCRE2_32 1
#cmakedefine DISABLE_PERCENT_ZT 1
#cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBEDIT 1
#cmakedefine SUPPORT_LIBREADLINE 1
#cmakedefine SUPPORT_LIBZ 1
#cmakedefine SUPPORT_JIT 1
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
#cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1
#cmakedefine BSR_ANYCRLF 1
#cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine HEAP_MATCH_RECURSE 1
#cmakedefine NEVER_BACKSLASH_C 1
#define PCRE2_EXPORT @PCRE2_EXPORT@
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
#define MAX_NAME_SIZE 128
#define MAX_NAME_COUNT 10000
/* end config.h for CMake builds */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,442 @@
Building PCRE2 without using autotools
--------------------------------------
This document contains the following sections:
General
Generic instructions for the PCRE2 C libraries
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
Building PCRE2 on Windows with Visual Studio
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
Building PCRE2 under VMS
GENERAL
The source of the PCRE2 libraries consists entirely of code written in Standard
C, and so should compile successfully on any system that has a Standard C
compiler and library.
The PCRE2 distribution includes a "configure" file for use by the
configure/make (autotools) build system, as found in many Unix-like
environments. The README file contains information about the options for
"configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE2 on Windows with CMake" below.
Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs
under the names src/config.h.generic and src/pcre2.h.generic. These are
provided for those who build PCRE2 without using "configure" or CMake. If you
use "configure" or CMake, the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES
There are three possible PCRE2 libraries, each handling data with a specific
code unit width: 8, 16, or 32 bits. You can build any combination of them. The
following are generic instructions for building a PCRE2 C library "by hand". If
you are going to use CMake, this section does not apply to you; you can skip
ahead to the CMake section. Note that the settings concerned with 8-bit,
16-bit, and 32-bit code units relate to the type of data string that PCRE2
processes. They are NOT referring to the underlying operating system bit width.
You do not have to do anything special to compile in a 64-bit environment, for
example.
(1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
macro settings that it contains to whatever is appropriate for your
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators by default. You need to #define at least one of
SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which
libraries you are going to build. You must set all that apply.
When you subsequently compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
An alternative approach is not to edit src/config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in src/config.h are used has changed between releases. (In the
configure/make world, this is handled automatically.) When upgrading to a
new release, you are strongly advised to review src/config.h.generic
before re-using what you had previously.
Note also that the src/config.h.generic file is created from a config.h
that was generated by Autotools, which automatically includes settings of
a number of macros that are not actually used by PCRE2 (for example,
HAVE_DLFCN_H).
(2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
(3) EITHER:
Copy or rename file src/pcre2_chartables.c.dist as
src/pcre2_chartables.c.
OR:
Compile src/pcre2_dftables.c as a stand-alone program (using
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
the single argument "src/pcre2_chartables.c". This generates a set of
standard character tables and writes them to that file. The tables are
generated using the default C locale for your system. If you want to use
a locale that is specified by LC_xxx environment variables, add the -L
option to the pcre2_dftables command. You must use this method if you
are building on a system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
(4) For a library that supports 8-bit code units in the character strings that
it processes, compile the following source files from the src directory,
setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
-DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
or else use other -D settings to change the configuration as required.
pcre2_auto_possess.c
pcre2_chkdint.c
pcre2_chartables.c
pcre2_compile.c
pcre2_compile_class.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_extuni.c
pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
pcre2_match_data.c
pcre2_newline.c
pcre2_ord2utf.c
pcre2_pattern_info.c
pcre2_script_run.c
pcre2_serialize.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substitute.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
pcre2_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE2 header files are first
sought in the src directory under the current directory. Otherwise you run
the risk of picking up a previously-installed file from somewhere else.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_jit_compile.c #includes other files from the sljit dependency,
all of whose names begin with "sljit". It also #includes
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
those yourself.
Note also that the pcre2_fuzzsupport.c file contains special code that is
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
you are doing that, you can ignore it.
(5) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the PCRE2 C 8-bit library,
typically called something like libpcre2-8. If your system has static and
shared libraries, you may have to do this once for each type.
(6) If you want to build a library that supports 16-bit or 32-bit code units,
set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4
above. If you want to build more than one PCRE2 library, repeat steps 4
and 5 as necessary.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the src/pcre2posix.h file and then
compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
library. If targeting a DLL in Windows, make sure to include
-DPCRE2POSIX_SHARED with your compiler flags.
(8) The pcre2test program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you specfied in
src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
appropriate library/ies. If you compiled an 8-bit library, pcre2test also
needs the pcre2posix wrapper library.
(9) Run pcre2test on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE2"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcre2test with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for Unicode support, and will not run if you have
built PCRE2 without it. See the comments at the start of each testinput
file. If you have a suitable Unix-like shell, the RunTest script will run
the appropriate tests for you. The command "RunTest list" will output a
list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention.
(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested
by running pcre2test with the -jit option. This is done automatically by
the RunTest script. You might also like to build and run the freestanding
JIT test program, src/pcre2_jit_test.c.
(11) The pcre2test program tests the POSIX wrapper library, but there is also a
freestanding test program in src/pcre2posix_test.c. It must be linked with
both the pcre2posix library and the 8-bit PCRE2 library.
(12) If you want to use the pcre2grep command, compile and link
src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need
the pcre2posix library). If you have built the PCRE2 library with JIT
support by defining SUPPORT_JIT in src/config.h, you can also define
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
STACK SIZE IN WINDOWS ENVIRONMENTS
Prior to release 10.30 the default system stack size of 1MiB in some Windows
environments caused issues with some tests. This should no longer be the case
for 10.30 and later releases.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE2 library in the form of
a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in a
PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE")
There are two ways of building PCRE2 using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE2 under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
On both MinGW and Cygwin, PCRE2 should build correctly using:
./configure && make && make install
This should create two libraries called libpcre2-8 and libpcre2-posix. These
are independent libraries: when you link with libpcre2-posix you must also link
with libpcre2-8, which contains the basic functions.
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE2 to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE2 are in UNIX format, with LF
characters as line terminators. Unless your PCRE2 library uses a default
newline option that includes LF as a valid newline, it may be necessary to
change the line terminators in the test files to get some of the tests to work.
BUILDING PCRE2 ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
If you are using CMake and encounter errors, deleting the CMake cache and
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
be deleted.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run CMake.
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
- Using the CMake GUI:
a) Run cmake-gui from the Shell environment of your build tool, for
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
VC/VC++.
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
c) Press the "Configure" button.
d) Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
e) The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
f) Press "Configure" again. The adjacent "Generate" button should now be
active.
g) Press "Generate".
5. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
Regardless of build system used, `cmake --build .` will build it.
6. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
Regardless of build system used, `ctest` will run the tests.
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
The code currently cannot be compiled without an inttypes.h header, which is
available only with Visual Studio 2013 or newer. However, this portable and
permissively-licensed implementation of the stdint.h header could be used as an
alternative:
http://www.azillionmonkeys.com/qed/pstdint.h
Just rename it and drop it into the top level of the build tree.
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. The pcre2_test.bat script runs RunTest.bat with correct source and
exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created.
2. Edit RunTest.bat to identify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre2_jit_test.exe.
BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment it should be possible to build PCRE2 in the same way as in other
systems, with the EBCDIC related configuration settings, but it is not known if
anybody has tried this.
In native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see file 939 on this web site:
http://www.cbttape.org
Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
BUILDING PCRE2 UNDER VMS
Alexey Chuphin has contributed some auxiliary files for building PCRE2 under
OpenVMS. They are in the "vms" directory in the distribution tarball. Please
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
programs contain some VMS-specific code.
==============================
Last updated: 26 December 2024
==============================

View File

@@ -0,0 +1,970 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
API. Since its initial release in 2015, there has been further development of
the code and it now differs from PCRE1 in more than just the API. There are new
features, and the internals have been improved. The original PCRE1 library is
now obsolete and no longer maintained. The latest release of PCRE2 is available
in .tar.gz, tar.bz2, or .zip form from this GitHub repository:
https://github.com/PCRE2Project/pcre2/releases
There is a mailing list for discussion about the development of PCRE2 at
pcre2-dev@googlegroups.com. You can subscribe by sending an email to
pcre2-dev+subscribe@googlegroups.com.
You can access the archives and also subscribe or manage your subscription
here:
https://groups.google.com/g/pcre2-dev
Please read the NEWS file if you are upgrading from a previous release. The
contents of this README file are:
The PCRE2 APIs
Documentation for PCRE2
Building PCRE2 on non-Unix-like systems
Building PCRE2 without using autotools
Building PCRE2 using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Making new tarballs
Testing PCRE2
Character tables
File manifest
The PCRE2 APIs
--------------
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
are no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
library that are based on the POSIX regular expression API (see the pcre2posix
man page). These are built into a library called libpcre2-posix. Note that this
just provides a POSIX calling interface to PCRE2; the regular expressions
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
and does not give full access to all of PCRE2's facilities.
The header file for the POSIX-style functions is called pcre2posix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE2 with
an existing program that uses the POSIX API, pcre2posix.h will have to be
renamed or pointed at by a link (or the program modified, of course). See the
pcre2posix documentation for more details.
Documentation for PCRE2
-----------------------
If you install PCRE2 in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre2". The one that is
just called "pcre2" lists all the others. In addition to these man pages, the
PCRE2 documentation is supplied in two other forms:
1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and
doc/pcre2test.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except the
listing of pcre2demo.c and those that summarize individual functions. The
other two are the text forms of the section 1 man pages for the pcre2grep
and pcre2test commands. These text forms are provided for ease of scanning
with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre2, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre2/html.
Building PCRE2 on non-Unix-like systems
---------------------------------------
For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if
your system supports the use of "configure" and "make" you may be able to build
PCRE2 using autotools in the same way as for many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE2 has been compiled on many different operating systems. It should be
straightforward to build PCRE2 on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE2 without using autotools
--------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE2 without using autotools.
Building PCRE2 using autotools
------------------------------
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
If you have downloaded and unpacked a PCRE2 release tarball, run the
"configure" command from the PCRE2 directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
The files in the GitHub repository do not contain "configure". If you have
downloaded the PCRE2 source files from GitHub, before you can run "configure"
you must run the shell script called autogen.sh. This runs a number of
autotools to create a "configure" script (you must of course have the autotools
commands installed in order to do this).
Most commonly, people build PCRE2 within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE2
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE2 source
into /source/pcre2/pcre2-xxx, but you want to build it in
/build/pcre2/pcre2-xxx:
cd /build/pcre2/pcre2-xxx
/source/pcre2/pcre2-xxx/configure
PCRE2 is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE2
library. They are also documented in the pcre2build man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
Setting --disable-shared ensures that PCRE2 libraries are built as static
libraries. The binaries that are then created as part of the build process
(for example, pcre2test and pcre2grep) are linked statically with one or more
PCRE2 libraries, but may also be dynamically linked with other libraries such
as libc. If you want these binaries to be fully statically linked, you can
set LDFLAGS like this:
LDFLAGS=--static ./configure --disable-shared
Note the two hyphens in --static. Of course, this works only if static
versions of all the relevant libraries are available for linking. See also
"Shared libraries" below.
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
to disable building the 8-bit library.
. If you want to include support for just-in-time (JIT) compiling, which can
give large performance improvements on certain platforms, add --enable-jit to
the "configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error. If in doubt, use --enable-jit=auto, which
enables JIT only if the current hardware is supported.
. If you are enabling JIT under SELinux environment you may also want to add
--enable-jit-sealloc, which enables the use of an executable memory allocator
that is compatible with SELinux. Warning: this allocator is experimental!
It does not support fork() operation and may crash when no disk space is
available. This option has no effect if JIT is disabled.
. If you do not want to make use of the default support for UTF-8 Unicode
character strings in the 8-bit library, UTF-16 Unicode character strings in
the 16-bit library, or UTF-32 Unicode character strings in the 32-bit
library, you can add --disable-unicode to the "configure" command. This
reduces the size of the libraries. It is not possible to configure one
library with Unicode support, and another without, in the same configuration.
It is also not possible to use --enable-ebcdic (see below) with Unicode
support, so if this option is set, you must also use --disable-unicode.
When Unicode support is available, the use of a UTF encoding still has to be
enabled by setting the PCRE2_UTF option at run time or starting a pattern
with (*UTF). When PCRE2 is compiled with Unicode support, its input can only
either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms.
As well as supporting UTF strings, Unicode support includes support for the
\P, \p, and \X sequences that recognize Unicode character properties.
However, only a subset of Unicode properties are supported; see the
pcre2pattern man page for details. Escape sequences such as \d and \w in
patterns do not by default make use of Unicode properties, but can be made to
do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
character as indicating the end of a line. Whatever you specify at build time
is the default; the caller of PCRE2 can change the selection at run time. The
default newline indicator is a single LF character (the Unix standard). You
can specify the default newline indicator by adding --enable-newline-is-cr,
--enable-newline-is-lf, --enable-newline-is-crlf,
--enable-newline-is-anycrlf, --enable-newline-is-any, or
--enable-newline-is-nul to the "configure" command, respectively.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE2 considers
to be the end of a line (see above). However, the caller of PCRE2 can
restrict \R to match only CR, LF, or CRLF. You can make this the default by
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. In a pattern, the escape sequence \C matches a single code unit, even in a
UTF mode. This can be dangerous because it breaks up multi-code-unit
characters. You can build PCRE2 with the use of \C permanently locked out by
adding --enable-never-backslash-C (note the upper case C) to the "configure"
command. When \C is allowed by the library, individual applications can lock
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of computing resource
it uses when matching a pattern. If the limit is exceeded during a match, the
match fails. The default is ten million. You can change the default by
setting, for example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre2_match() or pcre2_dfa_match() can supply their own value. There is more
discussion in the pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
(pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
matching process, which indirectly limits the amount of heap memory that is
used, and in the case of pcre2_dfa_match() the amount of stack as well. This
counter also has a default of ten million, which is essentially "unlimited".
You can change the default by setting, for example,
--with-match-limit-depth=5000
There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() and pcre2_dfa_match() interpreters:
--with-heap-limit=500
The units are kibibytes (units of 1024 bytes). This limit does not apply when
the JIT optimization (which has its own memory control features) is used.
There is more discussion on the pcre2api man page (search for
pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
64 kibibytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
to different parts of the compiled pattern. In the 16-bit library,
--with-link-size=3 is the same as --with-link-size=4, which (in both
libraries) uses four-byte offsets. Increasing the internal link size reduces
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
link size setting is ignored, as 4-byte offsets are always used.
. Lookbehind assertions in which one or more branches can match a variable
number of characters are supported only if there is a maximum matching length
for each top-level branch. There is a limit to this maximum that defaults to
255 characters. You can alter this default by a setting such as
--with-max-varlookbehind=100
The limit can be changed at runtime by calling pcre2_set_max_varlookbehind().
Lookbehind assertions in which every branch matches a fixed number of
characters (not necessarily all the same) are not constrained by this limit.
. For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called pcre2_dftables is compiled and run in the default C locale
when you obey "make". It builds a source file called pcre2_chartables.c. If
you do not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
. It is possible to compile PCRE2 for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --disable-unicode
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE2 will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE2 itself.
. In environments where the gcc compiler is used and lcov is installed, if you
specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE2 for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE2. There is more information about coverage
reporting in the "pcre2build" documentation.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
. There is support for calling external programs during matching in the
pcre2grep command, using PCRE2's callout facility with string arguments. This
support can be disabled by adding --disable-pcre2grep-callout to the
"configure" command. There are two kinds of callout: one that generates
output from inbuilt code, and another that calls an external program. The
latter has special support for Windows and VMS; otherwise it assumes the
existence of the fork() function. This facility can be disabled by adding
--disable-pcre2grep-callout-fork to the "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default starting size (in bytes) of the internal buffer used by pcre2grep
can be set by, for example:
--with-pcre2grep-bufsize=51200
The value must be a plain integer. The default is 20480. The amount of memory
used by pcre2grep is actually three times this number, to allow for "before"
and "after" lines. If very long lines are encountered, the buffer is
automatically enlarged, up to a fixed maximum size.
. The default maximum size of pcre2grep's internal buffer can be set by, for
example:
--with-pcre2grep-max-bufsize=2097152
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
whichever is the larger.
. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcre2test-libreadline or --enable-pcre2test-libedit
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licensed, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the
pcre2test build. In many operating environments with a system-installed
readline library this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), it may be
necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing
applications which link with readline the option to choose an appropriate
library." If you get error messages about missing functions tgetstr, tgetent,
tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses
library should fix it.
. The C99 standard defines formatting modifiers z and t for size_t and
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
environments other than Microsoft Visual Studio versions earlier than 2013
when __STDC_VERSION__ is defined and has a value greater than or equal to
199901L (indicating C99). However, there is at least one environment that
claims to be C99 but does not support these modifiers. If
--disable-percent-zt is specified, no use is made of the z or t modifiers.
Instead of %td or %zu, %lu is used, with a cast for size_t values.
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. If set, it causes an extra library
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
a single function called LLVMFuzzerTestOneInput() whose arguments are a
pointer to a string and the length of the string. When called, this function
tries to compile the string as a pattern, and if that succeeds, to match
it. This is done both with no options and with some random options bits that
are generated from the string. Setting --enable-fuzz-support also causes an
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
run under valgrind or used when PCRE2 is compiled with address sanitizing
enabled. It calls the fuzzing function and outputs information about what it
is doing. The input strings are specified by arguments: if an argument
starts with "=" the rest of it is a literal input string. Otherwise, it is
assumed to be a file name, and the contents of the file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for
backtracking instead of recursive function calls (which use the stack). This
is now obsolete because pcre2_match() was refactored always to use the heap
(in a much more efficient way than before). This option is retained for
backwards compatibility, but has no effect other than to output a warning.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. src/config.h build-time configuration options for the library
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre2.h are distributed in the src directory of PCRE2
tarballs under the names config.h.generic and pcre2.h.generic. These are
provided for those who have to build PCRE2 without using "configure" or CMake.
If you use "configure" or CMake, the .generic versions are not used.
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds whichever of the
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
program called pcre2test. If you enabled JIT support with --enable-jit, another
test program called pcre2_jit_test is built as well. If the 8-bit library is
built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also
built. Running "make" with the -j option may speed up compilation on
multiprocessor systems.
The command "make check" runs all the appropriate tests. Details of the PCRE2
tests are given below in a separate section of this document. The -j option of
"make" can also be used when running the tests.
You can use "make install" to install PCRE2 into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcre2test
pcre2grep (if 8-bit support is enabled)
pcre2-config
Libraries (lib):
libpcre2-8 (if 8-bit support is enabled)
libpcre2-16 (if 16-bit support is enabled)
libpcre2-32 (if 32-bit support is enabled)
libpcre2-posix (if 8-bit support is enabled)
Configuration information (lib/pkgconfig):
libpcre2-8.pc
libpcre2-16.pc
libpcre2-32.pc
libpcre2-posix.pc
Header files (include):
pcre2.h
pcre2posix.h
Man pages (share/man/man{1,3}):
pcre2grep.1
pcre2test.1
pcre2-config.1
pcre2.3
pcre2*.3 (lots more pages, all starting "pcre2")
HTML documentation (share/doc/pcre2/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre2):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
SECURITY
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
pcre2-config.txt the pcre2-config man page
If you want to remove PCRE2 from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre2-config, which can be used to
recall information about the PCRE2 configuration and installation. For example:
pcre2-config --version
prints the version number, and
pcre2-config --libs8
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --libs libpcre2-16
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE2 as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcre2test and pcre2grep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcre2grep and pcre2test are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE2 using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries. Note, however, that when you build only static
libraries, binary programs such as pcre2test and pcre2grep may still be
dynamically linked with other libraries (for example, libc) unless you set
LDFLAGS to --static when running "configure".
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
source file is compiled and run on the local host, in order to generate the
inbuilt character tables (the pcre2_chartables.c file). This will probably not
work, because pcre2_dftables.c needs to be compiled with the local compiler,
not the cross compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
hand and run it on the local host to make a new version of
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
at build time" for more details.
Making new tarballs
-------------------
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the maint/PrepareRelease script before making a distribution.
This script creates the .txt and HTML forms of the documentation from the man
pages.
Testing PCRE2
-------------
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the pcre2grep command.
When the 8-bit library is built, a test program for the POSIX wrapper, called
pcre2posix_test, is compiled, and when JIT support is enabled, a test program
called pcre2_jit_test is built. The scripts and the program tests are all run
when you obey "make check". For other environments, see the instructions in
NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcre2test test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcre2test. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 features are run only when Unicode support
is available. RunTest outputs a comment when it skips a test.
Many (but not all) of the tests that are not skipped are run twice if JIT
support is available. On the second run, JIT compilation is forced. This
testing can be suppressed by putting "-nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "-valgrind"
on the RunTest command line. To run pcre2test on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The test sequence starts with "test 0", which is a special test that has no
input file, and whose output is not checked. This is because it will be
different on different hardware and with different configurations. The test
exists in order to exercise some of pcre2test's code that would not otherwise
be run.
Tests 1 and 2 can always be run, as they expect only plain text strings (not
UTF) and make no use of Unicode properties. The first test file can be fed
directly into the perltest.sh script to check that Perl gives the same results.
The only difference you should see is in the first few lines, where the Perl
version is given instead of the PCRE2 version. The second set of tests check
auxiliary functions, error detection, and run-time flags that are specific to
PCRE2. It also uses the debugging flags to check some of the internals of
pcre2_compile().
If you build PCRE2 with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. For example, where the comparison test output contains
[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other
cases. This is not a bug in PCRE2.
Test 3 checks pcre2_maketables(), the facility for building a set of character
tables for a specific locale and using them instead of the default tables. The
script uses the "locale" command to check for the availability of the "fr_FR",
"french", or "fr" locale, and uses the first one that it finds. If the "locale"
command fails, or if its output doesn't include "fr_FR", "french", or "fr" in
the list of available locales, the third test cannot be run, and a comment is
output to say why. If running this test produces an error like this:
** Failed to set locale "fr_FR"
it means that the given locale is not available on your system, despite being
listed by "locale". This does not mean that PCRE2 is broken. There are three
alternative output files for the third test, because three different versions
of the French locale have been encountered. The test passes if its output
matches any one of them.
Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible
with the perltest.sh script, and test 5 checking PCRE2-specific things.
Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in
non-UTF mode and UTF-mode with Unicode property support, respectively.
Test 8 checks some internal offsets and code size features, but it is run only
when Unicode support is enabled. The output is different in 8-bit, 16-bit, and
32-bit modes and for different link sizes, so there are different output files
for each mode and link size.
Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in
16-bit and 32-bit modes. These are tests that generate different output in
8-bit mode. Each pair are for general cases and Unicode support, respectively.
Test 13 checks the handling of non-UTF characters greater than 255 by
pcre2_dfa_match() in 16-bit and 32-bit modes.
Test 14 contains some special UTF and UCP tests that give different output for
different code unit widths.
Test 15 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the interpretive
matcher.
Test 16 is run only when JIT support is not available. It checks that an
attempt to use JIT has the expected behaviour.
Test 17 is run only when JIT support is available. It checks JIT complete and
partial modes, match-limiting under JIT, and other JIT-specific features.
Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to
the 8-bit library, without and with Unicode support, respectively.
Test 20 checks the serialization functions by writing a set of compiled
patterns to a file, and then reloading and checking them.
Tests 21 and 22 test \C support when the use of \C is not locked out, without
and with UTF support, respectively. Test 23 tests \C when it is locked out.
Tests 24 and 25 test the experimental pattern conversion functions, without and
with UTF support, respectively.
Test 26 checks Unicode property support using tests that are generated
automatically from the Unicode data tables.
Character tables
----------------
For speed, PCRE2 uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, a set of tables that is
built into the library is used. The pcre2_maketables() function can be called
by an application to create a new set of tables in the current locale. This are
passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a
compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
specified for ./configure, a new version of pcre2_chartables.c is built by the
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale that is set for your system will control the contents of these default
tables. You can change the default tables by editing pcre2_chartables.c and
then re-building PCRE2. If you do this, you should take care to ensure that the
file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
When the pcre2_dftables program is run as a result of specifying
--enable-rebuild-chartables, it uses the default C locale that is set on your
system. It does not pay attention to the LC_xxx environment variables. In other
words, it uses the system's default locale rather than whatever the compiling
user happens to have set. If you really do want to build a source set of
character tables in a locale that is specified by the LC_xxx variables, you can
run the pcre2_dftables program by hand with the -L option. For example:
./pcre2_dftables -L pcre2_chartables.c.special
The second argument names the file where the source code for the tables is
written. The first two 256-byte tables provide lower casing and case flipping
functions, respectively. The next table consists of a number of 32-byte bit
maps which identify certain character classes such as digits, "word"
characters, white space, etc. These are used when building 32-byte bit maps
that represent character classes for code points less than 256. The final
256-byte table has bits indicating various character types, as follows:
1 white space character
2 letter
4 lower case letter
8 decimal digit
16 alphanumeric or '_'
You can also specify -b (with or without -L) when running pcre2_dftables. This
causes the tables to be written in binary instead of as source code. A set of
binary tables can be loaded into memory by an application and passed to
pcre2_compile() in the same way as tables created dynamically by calling
pcre2_maketables(). The tables are just a string of bytes, independent of
hardware characteristics such as endianness. This means they can be bundled
with an application that runs in different environments, to ensure consistent
behaviour.
See also the pcre2build section "Creating character tables at build time".
File manifest
-------------
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_compile_class.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_compile.h header for internal use
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_jit_char_inc.h header used by JIT
src/pcre2_jit_neon_inc.h header used by JIT
src/pcre2_jit_simd_inc.h header used by JIT
src/pcre2_ucp.h header for Unicode property handling
src/pcre2_util.h header for internal utils
deps/sljit/sljit_src/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
src/pcre2demo.c simple demonstration of coding calls to PCRE2
src/pcre2grep.c source of a grep utility that uses PCRE2
src/pcre2test.c comprehensive test program
src/pcre2_jit_test.c JIT test program
src/pcre2posix_test.c POSIX wrapper API test program
(C) Auxiliary files:
AUTHORS.md information about the authors of PCRE2
ChangeLog log of changes to the code
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE.md conditions for the use of PCRE2
COPYING the same, using GNU's standard name
SECURITY.md information on reporting vulnerabilities
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
RunTest.bat a Windows batch file for running tests
RunGrepTest.bat a Windows batch file for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
m4/* m4 macros (used by autoconf)
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2-config.txt plain text documentation of pcre2-config script
doc/pcre2grep.txt plain text documentation of grep utility program
doc/pcre2test.txt plain text documentation of test program
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
ar-lib )
config.guess )
config.sub )
depcomp ) helper tools generated by libtool and
compile ) automake, used internally by ./configure
install-sh )
ltmain.sh )
missing )
test-driver )
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for CMake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindEditline.cmake
cmake/FindReadline.cmake
cmake/pcre2-config-version.cmake.in
cmake/pcre2-config.cmake.in
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for building PCRE2 "by hand"
src/pcre2.h.generic ) a version of the public PCRE2 header file
) for use in non-"configure" environments
src/config.h.generic ) a version of config.h for use in non-"configure"
) environments
(F) Auxiliary files for building PCRE2 using other build systems
BUILD.bazel )
MODULE.bazel ) files used by the Bazel build system
WORKSPACE.bazel )
build.zig file used by zig's build system
(G) Auxiliary files for building PCRE2 under OpenVMS
vms/configure.com )
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
vms/pcre2.h_patch )
vms/stdint.h )
==============================
Last updated: 18 December 2024
==============================

View File

@@ -0,0 +1,327 @@
<html>
<!-- This is a manually maintained file that is the root of the HTML version of
the PCRE2 documentation. When the HTML documents are built from the man
page versions, the entire doc/html directory is emptied, this file is then
copied into doc/html/index.html, and the remaining files therein are
created by the 132html script.
-->
<head>
<title>PCRE2 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>Perl-compatible Regular Expressions (revised API: PCRE2)</h1>
<p>
The HTML documentation for PCRE2 consists of a number of pages that are listed
below in alphabetical order. If you are new to PCRE2, please read the first one
first.
</p>
<table>
<tr><td><a href="pcre2.html">pcre2</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre2-config.html">pcre2-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcre2api.html">pcre2api</a></td>
<td>&nbsp;&nbsp;PCRE2's native API</td></tr>
<tr><td><a href="pcre2build.html">pcre2build</a></td>
<td>&nbsp;&nbsp;Building PCRE2</td></tr>
<tr><td><a href="pcre2callout.html">pcre2callout</a></td>
<td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td>&nbsp;&nbsp;Compability with Perl</td></tr>
<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern conversion functions</td></tr>
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
<tr><td><a href="pcre2grep.html">pcre2grep</a></td>
<td>&nbsp;&nbsp;The <b>pcre2grep</b> command</td></tr>
<tr><td><a href="pcre2jit.html">pcre2jit</a></td>
<td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
<tr><td><a href="pcre2limits.html">pcre2limits</a></td>
<td>&nbsp;&nbsp;Details of size and other limits</td></tr>
<tr><td><a href="pcre2matching.html">pcre2matching</a></td>
<td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
<tr><td><a href="pcre2partial.html">pcre2partial</a></td>
<td>&nbsp;&nbsp;Using PCRE2 for partial matching</td></tr>
<tr><td><a href="pcre2pattern.html">pcre2pattern</a></td>
<td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE2</td></tr>
<tr><td><a href="pcre2perform.html">pcre2perform</a></td>
<td>&nbsp;&nbsp;Some comments on performance</td></tr>
<tr><td><a href="pcre2posix.html">pcre2posix</a></td>
<td>&nbsp;&nbsp;The POSIX API to the PCRE2 8-bit library</td></tr>
<tr><td><a href="pcre2sample.html">pcre2sample</a></td>
<td>&nbsp;&nbsp;Discussion of the pcre2demo program</td></tr>
<tr><td><a href="pcre2serialize.html">pcre2serialize</a></td>
<td>&nbsp;&nbsp;Serializing functions for saving precompiled patterns</td></tr>
<tr><td><a href="pcre2syntax.html">pcre2syntax</a></td>
<td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>
<tr><td><a href="pcre2test.html">pcre2test</a></td>
<td>&nbsp;&nbsp;The <b>pcre2test</b> command for testing PCRE2</td></tr>
<tr><td><a href="pcre2unicode.html">pcre2unicode</a></td>
<td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
</table>
<p>
There are also individual pages that summarize the interface for each function
in the library.
</p>
<table>
<tr><td><a href="pcre2_callout_enumerate.html">pcre2_callout_enumerate</a></td>
<td>&nbsp;&nbsp;Enumerate callouts in a compiled pattern</td></tr>
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
<td>&nbsp;&nbsp;Copy a compiled pattern</td></tr>
<tr><td><a href="pcre2_code_copy_with_tables.html">pcre2_code_copy_with_tables</a></td>
<td>&nbsp;&nbsp;Copy a compiled pattern and its character tables</td></tr>
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
<td>&nbsp;&nbsp;Free a compiled pattern</td></tr>
<tr><td><a href="pcre2_compile.html">pcre2_compile</a></td>
<td>&nbsp;&nbsp;Compile a regular expression pattern</td></tr>
<tr><td><a href="pcre2_compile_context_copy.html">pcre2_compile_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a compile context</td></tr>
<tr><td><a href="pcre2_compile_context_create.html">pcre2_compile_context_create</a></td>
<td>&nbsp;&nbsp;Create a compile context</td></tr>
<tr><td><a href="pcre2_compile_context_free.html">pcre2_compile_context_free</a></td>
<td>&nbsp;&nbsp;Free a compile context</td></tr>
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
<td>&nbsp;&nbsp;Create a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
<td>&nbsp;&nbsp;Free a convert context</td></tr>
<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
<td>&nbsp;&nbsp;Free converted foreign pattern</td></tr>
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre2_general_context_copy.html">pcre2_general_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a general context</td></tr>
<tr><td><a href="pcre2_general_context_create.html">pcre2_general_context_create</a></td>
<td>&nbsp;&nbsp;Create a general context</td></tr>
<tr><td><a href="pcre2_general_context_free.html">pcre2_general_context_free</a></td>
<td>&nbsp;&nbsp;Free a general context</td></tr>
<tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
<td>&nbsp;&nbsp;Get textual error message for error number</td></tr>
<tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
<td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>
<tr><td><a href="pcre2_get_match_data_size.html">pcre2_get_match_data_size</a></td>
<td>&nbsp;&nbsp;Get the size of a match data block</td></tr>
<tr><td><a href="pcre2_get_ovector_count.html">pcre2_get_ovector_count</a></td>
<td>&nbsp;&nbsp;Get the ovector count</td></tr>
<tr><td><a href="pcre2_get_ovector_pointer.html">pcre2_get_ovector_pointer</a></td>
<td>&nbsp;&nbsp;Get a pointer to the ovector</td></tr>
<tr><td><a href="pcre2_get_startchar.html">pcre2_get_startchar</a></td>
<td>&nbsp;&nbsp;Get the starting character offset</td></tr>
<tr><td><a href="pcre2_jit_compile.html">pcre2_jit_compile</a></td>
<td>&nbsp;&nbsp;Process a compiled pattern with the JIT compiler</td></tr>
<tr><td><a href="pcre2_jit_free_unused_memory.html">pcre2_jit_free_unused_memory</a></td>
<td>&nbsp;&nbsp;Free unused JIT memory</td></tr>
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
<td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
<td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_maketables_free.html">pcre2_maketables_free</a></td>
<td>&nbsp;&nbsp;Free character tables</td></tr>
<tr><td><a href="pcre2_match.html">pcre2_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>
<tr><td><a href="pcre2_match_context_copy.html">pcre2_match_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a match context</td></tr>
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
<td>&nbsp;&nbsp;Create a match context</td></tr>
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_context_free</a></td>
<td>&nbsp;&nbsp;Free a match context</td></tr>
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
<td>&nbsp;&nbsp;Create a match data block</td></tr>
<tr><td><a href="pcre2_match_data_create_from_pattern.html">pcre2_match_data_create_from_pattern</a></td>
<td>&nbsp;&nbsp;Create a match data block getting size from pattern</td></tr>
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td>&nbsp;&nbsp;Free a match data block</td></tr>
<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern converter</td></tr>
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
<tr><td><a href="pcre2_serialize_decode.html">pcre2_serialize_decode</a></td>
<td>&nbsp;&nbsp;Decode serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_serialize_encode.html">pcre2_serialize_encode</a></td>
<td>&nbsp;&nbsp;Serialize compiled patterns for save/restore</td></tr>
<tr><td><a href="pcre2_serialize_free.html">pcre2_serialize_free</a></td>
<td>&nbsp;&nbsp;Free serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_serialize_get_number_of_codes.html">pcre2_serialize_get_number_of_codes</a></td>
<td>&nbsp;&nbsp;Get number of serialized compiled patterns</td></tr>
<tr><td><a href="pcre2_set_bsr.html">pcre2_set_bsr</a></td>
<td>&nbsp;&nbsp;Set \R convention</td></tr>
<tr><td><a href="pcre2_set_callout.html">pcre2_set_callout</a></td>
<td>&nbsp;&nbsp;Set up a callout function</td></tr>
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
<td>&nbsp;&nbsp;Set character tables</td></tr>
<tr><td><a href="pcre2_set_compile_extra_options.html">pcre2_set_compile_extra_options</a></td>
<td>&nbsp;&nbsp;Set compile time extra options</td></tr>
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
<td>&nbsp;&nbsp;Set up a compile recursion guard function</td></tr>
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
<td>&nbsp;&nbsp;Set glob escape character</td></tr>
<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
<td>&nbsp;&nbsp;Set glob separator character</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr>
<tr><td><a href="pcre2_set_max_pattern_compiled_length.html">pcre2_set_max_pattern_compiled_length</a></td>
<td>&nbsp;&nbsp;Set the maximum length of a compiled pattern</td></tr>
<tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
<td>&nbsp;&nbsp;Set the maximum length of a pattern</td></tr>
<tr><td><a href="pcre2_set_max_varlookbehind.html">pcre2_set_max_varlookbehind</a></td>
<td>&nbsp;&nbsp;Set the maximum match length for a variable-length lookbehind</td></tr>
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
<td>&nbsp;&nbsp;Set the newline convention</td></tr>
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
<td>&nbsp;&nbsp;Set the offset limit</td></tr>
<tr><td><a href="pcre2_set_optimize.html">pcre2_set_optimize</a></td>
<td>&nbsp;&nbsp;Set an optimization directive</td></tr>
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
<td>&nbsp;&nbsp;Set the parentheses nesting limit</td></tr>
<tr><td><a href="pcre2_set_recursion_limit.html">pcre2_set_recursion_limit</a></td>
<td>&nbsp;&nbsp;Obsolete: use pcre2_set_depth_limit</td></tr>
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
<td>&nbsp;&nbsp;Obsolete function that (from 10.30 onwards) does nothing</td></tr>
<tr><td><a href="pcre2_set_substitute_callout.html">pcre2_set_substitute_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution callout function</td></tr>
<tr><td><a href="pcre2_set_substitute_case_callout.html">pcre2_set_substitute_case_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution case callout function</td></tr>
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string and do
substitutions</td></tr>
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
<td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>
<tr><td><a href="pcre2_substring_copy_bynumber.html">pcre2_substring_copy_bynumber</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>
<tr><td><a href="pcre2_substring_free.html">pcre2_substring_free</a></td>
<td>&nbsp;&nbsp;Free extracted substring</td></tr>
<tr><td><a href="pcre2_substring_get_byname.html">pcre2_substring_get_byname</a></td>
<td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>
<tr><td><a href="pcre2_substring_get_bynumber.html">pcre2_substring_get_bynumber</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
<tr><td><a href="pcre2_substring_length_byname.html">pcre2_substring_length_byname</a></td>
<td>&nbsp;&nbsp;Find length of named substring</td></tr>
<tr><td><a href="pcre2_substring_length_bynumber.html">pcre2_substring_length_bynumber</a></td>
<td>&nbsp;&nbsp;Find length of numbered substring</td></tr>
<tr><td><a href="pcre2_substring_list_free.html">pcre2_substring_list_free</a></td>
<td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>
<tr><td><a href="pcre2_substring_list_get.html">pcre2_substring_list_get</a></td>
<td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
<tr><td><a href="pcre2_substring_nametable_scan.html">pcre2_substring_nametable_scan</a></td>
<td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
<tr><td><a href="pcre2_substring_number_from_name.html">pcre2_substring_number_from_name</a></td>
<td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
</table>
</html>

View File

@@ -0,0 +1,102 @@
<html>
<head>
<title>pcre2-config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2-config man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcre2-config [--prefix] [--exec-prefix] [--version]</b>
<b> [--libs8] [--libs16] [--libs32] [--libs-posix]</b>
<b> [--cflags] [--cflags-posix]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>pcre2-config</b> returns the configuration of the installed PCRE2 libraries
and the options required to compile a program to use them. Some of the options
apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are
not available for libraries that have not been built. If an unavailable option
is encountered, the "usage" information is output.
</P>
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
<P>
<b>--prefix</b>
Writes the directory prefix used in the PCRE2 installation for architecture
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
systems) to the standard output.
</P>
<P>
<b>--exec-prefix</b>
Writes the directory prefix used in the PCRE2 installation for architecture
dependent files (normally the same as <b>--prefix</b>) to the standard output.
</P>
<P>
<b>--version</b>
Writes the version number of the installed PCRE2 libraries to the standard
output.
</P>
<P>
<b>--libs8</b>
Writes to the standard output the command line options required to link
with the 8-bit PCRE2 library (<b>-lpcre2-8</b> on many systems).
</P>
<P>
<b>--libs16</b>
Writes to the standard output the command line options required to link
with the 16-bit PCRE2 library (<b>-lpcre2-16</b> on many systems).
</P>
<P>
<b>--libs32</b>
Writes to the standard output the command line options required to link
with the 32-bit PCRE2 library (<b>-lpcre2-32</b> on many systems).
</P>
<P>
<b>--libs-posix</b>
Writes to the standard output the command line options required to link with
PCRE2's POSIX API wrapper library (<b>-lpcre2-posix</b> <b>-lpcre2-8</b> on many
systems).
</P>
<P>
<b>--cflags</b>
Writes to the standard output the command line options required to compile
files that use PCRE2 (this may include some <b>-I</b> options, but is blank on
many systems).
</P>
<P>
<b>--cflags-posix</b>
Writes to the standard output the command line options required to compile
files that use PCRE2's POSIX API wrapper library (this may include some
<b>-I</b> options, but is blank on many systems).
</P>
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2(3)</b>
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been subsequently revised as a generic PCRE2 man page.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 28 September 2014
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,214 @@
<html>
<head>
<title>pcre2 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2 man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
<li><a name="TOC4" href="#SEC4">AUTHORS</a>
<li><a name="TOC5" href="#SEC5">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
<P>
PCRE2 is the name used for a revised API for the PCRE library, which is a set
of functions, written in C, that implement regular expression pattern matching
using the same syntax and semantics as Perl, with just a few differences. After
nearly two decades, the limitations of the original API were making development
increasingly difficult. The new API is more extensible, and it was simplified
by abolishing the separate "study" optimizing function; in PCRE2, patterns are
automatically optimized where possible. Since forking from PCRE1, the code has
been extensively refactored and new features introduced. The old library is now
obsolete and is no longer maintained.
</P>
<P>
As well as Perl-style regular expression patterns, some features that appeared
in Python and the original PCRE before they appeared in Perl are available
using the Python syntax. There is also some support for one or two .NET and
Oniguruma syntax items, and there are options for requesting some minor changes
that give better ECMAScript (aka JavaScript) compatibility.
</P>
<P>
The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit,
or 32-bit code units, which means that up to three separate libraries may be
installed, one for each code unit size. The size of code unit is not related to
the bit size of the underlying hardware. In a 64-bit environment that also
supports 32-bit applications, versions of PCRE2 that are compiled in both
64-bit and 32-bit modes may be needed.
</P>
<P>
The original work to extend PCRE to 16-bit and 32-bit code units was done by
Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
can be interpreted either as one character per code unit, or as UTF-encoded
Unicode, with support for Unicode general category properties. Unicode support
is optional at build time (but is the default). However, processing strings as
UTF code units must be enabled explicitly at run time. The version of Unicode
in use can be discovered by running
<pre>
pcre2test -C
</PRE>
</P>
<P>
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
the case.
</P>
<P>
In addition to the Perl-compatible matching function, PCRE2 contains an
alternative function that matches the same compiled patterns in a different
way. In certain circumstances, the alternative function has some advantages.
For a discussion of the two matching algorithms, see the
<a href="pcre2matching.html"><b>pcre2matching</b></a>
page.
</P>
<P>
Details of exactly which Perl regular expression features are and are not
supported by PCRE2 are given in separate documents. See the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
and
<a href="pcre2compat.html"><b>pcre2compat</b></a>
pages. There is a syntax summary in the
<a href="pcre2syntax.html"><b>pcre2syntax</b></a>
page.
</P>
<P>
Some features of PCRE2 can be included, excluded, or changed when the library
is built. The
<a href="pcre2_config.html"><b>pcre2_config()</b></a>
function makes it possible for a client to discover which features are
available. The features themselves are described in the
<a href="pcre2build.html"><b>pcre2build</b></a>
page. Documentation about building PCRE2 for various operating systems can be
found in the
<a href="README.txt"><b>README</b></a>
and
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
files in the source distribution.
</P>
<P>
The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with
"_pcre2", which hopefully will not provoke any name clashes. In some
environments, it is possible to control which external symbols are exported
when a shared library is built, and in these cases the undocumented symbols are
not exported.
</P>
<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
<P>
If you are using PCRE2 in a non-UTF application that permits users to supply
arbitrary patterns for compilation, you should be aware of a feature that
allows users to turn on UTF support from within a pattern. For example, an
8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets
patterns and subjects as strings of UTF-8 code units instead of individual
8-bit characters. This causes both the pattern and any data against which it is
matched to be checked for UTF-8 validity. If the data string is very long, such
a check might use sufficiently many resources as to cause your application to
lose performance.
</P>
<P>
One way of guarding against this possibility is to use the
<b>pcre2_pattern_info()</b> function to check the compiled pattern's options for
PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling
<b>pcre2_compile()</b>. This causes a compile time error if the pattern contains
a UTF-setting sequence.
</P>
<P>
The use of Unicode properties for character types such as \d can also be
enabled from within the pattern, by specifying "(*UCP)". This feature can be
disallowed by setting the PCRE2_NEVER_UCP option.
</P>
<P>
If your application is one that supports UTF, be aware that validity checking
can take time. If the same data string is to be matched many times, you can use
the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid
running redundant checks.
</P>
<P>
The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
problems, because it may leave the current matching point in the middle of a
multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an
application to lock out the use of \C, causing a compile-time error if it is
encountered. It is also possible to build PCRE2 with the use of \C permanently
disabled.
</P>
<P>
Another way that performance can be hit is by running a pattern that has a very
large search tree against a string that will never match. Nested unlimited
repeats in a pattern are a common example. PCRE2 provides some protection
against this: see the <b>pcre2_set_match_limit()</b> function in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page. There is a similar function called <b>pcre2_set_depth_limit()</b> that can
be used to restrict the amount of memory that is used.
</P>
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
<P>
The user documentation for PCRE2 comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
the descriptions of the <b>pcre2grep</b> and <b>pcre2test</b> programs are in
files called <b>pcre2grep.txt</b> and <b>pcre2test.txt</b>, respectively. The
remaining sections, except for the <b>pcre2demo</b> section (which is a program
listing), and the short pages for individual functions, are concatenated in
<b>pcre2.txt</b>, for ease of searching. The sections are as follows:
<pre>
pcre2 this document
pcre2-config show PCRE2 installation configuration information
pcre2api details of PCRE2's native C API
pcre2build building PCRE2
pcre2callout details of the pattern callout feature
pcre2compat discussion of Perl compatibility
pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
pcre2jit discussion of just-in-time optimization support
pcre2limits details of size and other limits
pcre2matching discussion of the two matching algorithms
pcre2partial details of the partial matching facility
pcre2pattern syntax and semantics of supported regular expression patterns
pcre2perform discussion of performance issues
pcre2posix the POSIX-compatible C API for the 8-bit library
pcre2sample discussion of the pcre2demo program
pcre2serialize details of pattern serialization
pcre2syntax quick syntax reference
pcre2test description of the <b>pcre2test</b> command
pcre2unicode discussion of Unicode and UTF support
</pre>
In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
</P>
<br><a name="SEC4" href="#TOC1">AUTHORS</a><br>
<P>
The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg.
</P>
<P>
PCRE2 was written by Philip Hazel, of the University Computing Service,
Cambridge, England. Many others have also contributed.
</P>
<P>
To contact the maintainers, please use the GitHub issues tracker or PCRE2
mailing list, as described at the project page:
<a href="https://github.com/PCRE2Project/pcre2">https://github.com/PCRE2Project/pcre2</a>
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
Last updated: 18 December 2024
<br>
Copyright &copy; 1997-2021 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,63 @@
<html>
<head>
<title>pcre2_callout_enumerate specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_callout_enumerate man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function scans a compiled regular expression and calls the <i>callback()</i>
function for each callout within the pattern. The yield of the function is zero
for success and non-zero otherwise. The arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>callback</i> The callback function
<i>callout_data</i> User data that is passed to the callback
</pre>
The <i>callback()</i> function is passed a pointer to a data block containing
the following fields (not necessarily in this order):
<pre>
uint32_t <i>version</i> Block version number
uint32_t <i>callout_number</i> Number for numbered callouts
PCRE2_SIZE <i>pattern_position</i> Offset to next item in pattern
PCRE2_SIZE <i>next_item_length</i> Length of next item in pattern
PCRE2_SIZE <i>callout_string_offset</i> Offset to string within pattern
PCRE2_SIZE <i>callout_string_length</i> Length of callout string
PCRE2_SPTR <i>callout_string</i> Points to callout string or is NULL
</pre>
The second argument passed to the <b>callback()</b> function is the callout data
that was passed to <b>pcre2_callout_enumerate()</b>. The <b>callback()</b>
function must return zero for success. Any other value causes the pattern scan
to stop, with the value being passed back as the result of
<b>pcre2_callout_enumerate()</b>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_code_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_code_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a copy of the memory used for a compiled pattern, excluding
any memory used by the JIT compiler. Without a subsequent call to
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching. The
pointer to the character tables is copied, not the tables themselves (see
<b>pcre2_code_copy_with_tables()</b>). The yield of the function is NULL if
<i>code</i> is NULL or if sufficient memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_code_copy_with_tables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_code_copy_with_tables man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a copy of the memory used for a compiled pattern, excluding
any memory used by the JIT compiler. Without a subsequent call to
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching.
Unlike <b>pcre2_code_copy()</b>, a separate copy of the character tables is also
made, with the new code pointing to it. This memory will be automatically freed
when <b>pcre2_code_free()</b> is called. The yield of the function is NULL if
<i>code</i> is NULL or if sufficient memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_code_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_code_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
If <i>code</i> is NULL, this function does nothing. Otherwise, <i>code</i> must
point to a compiled pattern. This function frees its memory, including any
memory used by the JIT compiler. If the compiled pattern was created by a call
to <b>pcre2_code_copy_with_tables()</b>, the memory for the character tables is
also freed.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,120 @@
<html>
<head>
<title>pcre2_compile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
<b> pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function compiles a regular expression pattern into an internal form. Its
arguments are:
<pre>
<i>pattern</i> A string containing expression to be compiled
<i>length</i> The length of the string or PCRE2_ZERO_TERMINATED
<i>options</i> Primary option bits
<i>errorcode</i> Where to put an error code
<i>erroffset</i> Where to put an error offset
<i>ccontext</i> Pointer to a compile context or NULL
</pre>
The length of the pattern and any error offset that is returned are in code
units, not characters. A NULL pattern with zero length is treated as an empty
string. A compile context is needed only if you want to provide custom memory
allocation functions, or to provide an external function for system stack size
checking (see <b>pcre2_set_compile_recursion_guard()</b>), or to change one or
more of these parameters:
<pre>
What \R matches (Unicode newlines, or CR, LF, CRLF only);
PCRE2's character tables;
The newline character sequence;
The compile time nested parentheses limit;
The maximum pattern length (in code units) that is allowed;
The additional options bits.
</pre>
The primary option bits are:
<pre>
PCRE2_ANCHORED Force pattern anchoring
PCRE2_ALLOW_EMPTY_CLASS Allow empty classes
PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
PCRE2_ALT_VERBNAMES Process backslashes in verb names
PCRE2_AUTO_CALLOUT Compile automatic callouts
PCRE2_CASELESS Do caseless matching
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
PCRE2_DOTALL . matches anything including NL
PCRE2_DUPNAMES Allow duplicate names for subpatterns
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_LITERAL Pattern characters are all literal
PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF
PCRE2_MATCH_UNSET_BACKREF Match unset backreferences
PCRE2_MULTILINE ^ and $ match newlines within data
PCRE2_NEVER_BACKSLASH_C Lock out the use of \C in patterns
PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP)
PCRE2_NEVER_UTF Lock out PCRE2_UTF, e.g. via (*UTF)
PCRE2_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE2_NO_AUTO_POSSESS Disable auto-possessification
PCRE2_NO_DOTSTAR_ANCHOR Disable automatic anchoring for .*
PCRE2_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE2_NO_UTF_CHECK Do not check the pattern for UTF validity
(only relevant if PCRE2_UTF is set)
PCRE2_UCP Use Unicode properties for \d, \w, etc.
PCRE2_UNGREEDY Invert greediness of quantifiers
PCRE2_USE_OFFSET_LIMIT Enable offset limit for unanchored matching
PCRE2_UTF Treat pattern and subjects as UTF strings
</pre>
PCRE2 must be built with Unicode support (the default) in order to use
PCRE2_UTF, PCRE2_UCP and related options.
</P>
<P>
Additional options may be set in the compile context via the
<a href="pcre2_set_compile_extra_options.html"><b>pcre2_set_compile_extra_options</b></a>
function.
</P>
<P>
If either of <i>errorcode</i> or <i>erroroffset</i> is NULL, the function returns
NULL immediately. Otherwise, the yield of this function is a pointer to a
private data structure that contains the compiled pattern, or NULL if an error
was detected. In the error case, a text error message can be obtained by
passing the value returned via the <i>errorcode</i> argument to the
<b>pcre2_get_error_message()</b> function. The offset (in code units) where the
error was encountered is returned via the <i>erroroffset</i> argument.
</P>
<P>
If there is no error, the value passed via <i>errorcode</i> returns the message
"no error" if passed to <b>pcre2_get_error_message()</b>, and the value passed
via <i>erroroffset</i> is zero.
</P>
<P>
There is a complete description of the PCRE2 native API, with more detail on
each option, in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page, and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_compile_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_compile_context *pcre2_compile_context_copy(</b>
<b> pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a compile context, using the memory
allocation function that was used for the original context. The result is NULL
if the memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_compile_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_compile_context *pcre2_compile_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a new compile context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_compile_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_compile_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a compile context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set. If the argument is NULL, the function returns
immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,84 @@
<html>
<head>
<title>pcre2_config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_config man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE2 library it is using. The
arguments are as follows:
<pre>
<i>what</i> A code specifying what information is required
<i>where</i> Points to where to put the information
</pre>
If <i>where</i> is NULL, the function returns the amount of memory needed for
the requested information. When the information is a string, the value is in
code units; for other types of data it is in bytes.
</P>
<P>
If <b>where</b> is not NULL, for PCRE2_CONFIG_JITTARGET,
PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a
buffer that is large enough to hold the string. For all other codes it must
point to a uint32_t integer variable. The available codes are:
<pre>
PCRE2_CONFIG_BSR Indicates what \R matches by default:
PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
PCRE2_CONFIG_NEVER_BACKSLASH_C Whether or not \C is disabled
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
PCRE2_NEWLINE_CR
PCRE2_NEWLINE_LF
PCRE2_NEWLINE_CRLF
PCRE2_NEWLINE_ANY
PCRE2_NEWLINE_ANYCRLF
PCRE2_NEWLINE_NUL
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
PCRE2_CONFIG_RECURSIONLIMIT Obsolete: use PCRE2_CONFIG_DEPTHLIMIT
PCRE2_CONFIG_STACKRECURSE Obsolete: always returns 0
PCRE2_CONFIG_UNICODE Availability of Unicode support (1=yes 0=no)
PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string)
PCRE2_CONFIG_VERSION The PCRE2 version (a string)
</pre>
The function yields a non-negative value on success or the negative value
PCRE2_ERROR_BADOPTION otherwise. This is also the result for the
PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is
requested, the function returns the number of code units used, including the
terminating zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_convert_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It makes a new copy of a convert context, using the memory allocation function
that was used for the original context. The result is NULL if the memory cannot
be obtained.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_convert_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It creates and initializes a new convert context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_convert_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a convert context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set. If the argument is NULL, the function returns
immediately without doing anything.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_converted_pattern_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_converted_pattern_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a converted pattern that was obtained by
calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
the converted pattern into newly obtained heap memory. If the argument is NULL,
the function returns immediately without doing anything.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,86 @@
<html>
<head>
<title>pcre2_dfa_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_dfa_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>,</b>
<b> int *<i>workspace</i>, PCRE2_SIZE <i>wscount</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string
just once (except when processing lookaround assertions). This function is
<i>not</i> Perl-compatible (the Perl-compatible matching function is
<b>pcre2_match()</b>). The arguments for this function are:
<pre>
<i>code</i> Points to the compiled pattern
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>match_data</i> Points to a match data block, for results
<i>mcontext</i> Points to a match context, or is NULL
<i>workspace</i> Points to a vector of ints used as working space
<i>wscount</i> Number of elements in the vector
</pre>
The size of output vector needed to contain all the results depends on the
number of simultaneous matches, not on the number of parentheses in the
pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
data block is therefore not advisable when using this function.
</P>
<P>
A match context is needed only if you want to set up a callout function or
specify the heap limit or the match or the recursion depth limits. The
<i>length</i> and <i>startoffset</i> values are code units, not characters. The
options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_COPY_MATCHED_SUBJECT
On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject is not a valid match
PCRE2_NO_UTF_CHECK Do not check the subject for UTF validity (only relevant if PCRE2_UTF
was set at compile time)
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
PCRE2_DFA_RESTART Restart after a partial match
PCRE2_DFA_SHORTEST Return only the shortest match
</pre>
There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the
<a href="pcre2matching.html"><b>pcre2matching</b></a>
documentation. For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page. There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_general_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_general_context *pcre2_general_context_copy(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a general context, using the memory
allocation functions in the context, if set, to get the necessary memory.
Otherwise <b>malloc()</b> is used. The result is NULL if the memory cannot be
obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_general_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_general_context *pcre2_general_context_create(</b>
<b> void *(*<i>private_malloc</i>)(size_t, void *),</b>
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a general context. The arguments define
custom memory management functions and a data value that is passed to them when
they are called. The <b>private_malloc()</b> function is used to get memory for
the context. If either of the first two arguments is NULL, the system memory
management function is used. The result is NULL if no memory could be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_general_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_general_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a general context, using the memory
freeing function within the context, if set. If the argument is NULL, the
function returns immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,51 @@
<html>
<head>
<title>pcre2_get_error_message specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_error_message man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
<b> PCRE2_SIZE <i>bufflen</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function provides a textual error message for each PCRE2 error code.
Compilation errors are positive numbers; UTF formatting errors and matching
errors are negative numbers. The arguments are:
<pre>
<i>errorcode</i> an error code (positive or negative)
<i>buffer</i> where to put the message
<i>bufflen</i> the length of the buffer (code units)
</pre>
The function returns the length of the message in code units, excluding the
trailing zero, or the negative error code PCRE2_ERROR_NOMEMORY if the buffer is
too small. In this case, the returned message is truncated (but still with a
trailing zero). If <i>errorcode</i> does not contain a recognized error code
number, the negative value PCRE2_ERROR_BADDATA is returned.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,47 @@
<html>
<head>
<title>pcre2_get_mark specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_mark man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
After a call of <b>pcre2_match()</b> that was passed the match block that is
this function's argument, this function returns a pointer to the last (*MARK),
(*PRUNE), or (*THEN) name that was encountered during the matching process. The
name is zero-terminated, and is within the compiled pattern. The length of the
name is in the preceding code unit. If no name is available, NULL is returned.
</P>
<P>
After a successful match, the name that is returned is the last one on the
matching path. After a failed match or a partial match, the last encountered
name is returned.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_get_match_data_heapframes_size specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_match_data_heapframes_size man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE pcre2_get_match_data_heapframes_size(</b>
<b> pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the size, in bytes, of the heapframes data block that is
owned by its argument.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_get_match_data_size specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_match_data_size man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the size, in bytes, of the match data block that is its
argument.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_get_ovector_count specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_ovector_count man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns the number of pairs of offsets in the ovector that forms
part of the given match data block.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_get_ovector_pointer specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_ovector_pointer man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns a pointer to the vector of offsets that forms part of the
given match data block. The number of pairs can be found by calling
<b>pcre2_get_ovector_count()</b>.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_get_startchar specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_get_startchar man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
After a successful call of <b>pcre2_match()</b> that was passed the match block
that is this function's argument, this function returns the code unit offset of
the character at which the successful match started. For a non-partial match,
this can be different to the value of <i>ovector[0]</i> if the pattern contains
the \K escape sequence. After a partial match, however, this value is always
the same as <i>ovector[0]</i> because \K does not affect the result of a
partial match.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,74 @@
<html>
<head>
<title>pcre2_jit_compile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_compile man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function requests JIT compilation, which, if the just-in-time compiler is
available, further processes a compiled pattern into machine code that executes
much faster than the <b>pcre2_match()</b> interpretive matching function. Full
details are given in the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation.
</P>
<P>
The availability of JIT support can be tested by calling
<b>pcre2_compile_jit()</b> with a single option PCRE2_JIT_TEST_ALLOC (the
code argument is ignored, so a NULL value is accepted). Such a call
returns zero if JIT is available and has a working allocator. Otherwise
it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate
executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not
compiled.
</P>
<P>
Otherwise, the first argument must be a pointer that was returned by a
successful call to <b>pcre2_compile()</b>, and the second must contain one or
more of the following bits:
<pre>
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
</pre>
There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
option is deprecated and may be removed in the future.
</P>
<P>
The yield of the function when called with any of the three options above is 0
for success, or a negative error code otherwise. In particular,
PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown
bit is set in <i>options</i>. The function can also return PCRE2_ERROR_NOMEMORY
if JIT is unable to allocate executable memory for the compiler, even if it was
because of a system security restriction. In a few cases, the function may
return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_jit_free_unused_memory specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_free_unused_memory man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees unused JIT executable memory. The argument is a general
context, for custom memory management, or NULL for standard memory management.
JIT memory allocation retains some memory in order to improve future JIT
compilation speed. In low memory conditions,
<b>pcre2_jit_free_unused_memory()</b> can be used to cause this memory to be
freed.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,70 @@
<html>
<head>
<title>pcre2_jit_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression that has been successfully
processed by the JIT compiler against a given subject string, using a matching
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
</P>
<P>
In UTF mode, the subject string is not checked for UTF validity. Unless
PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an
invalid UTF string results in undefined behaviour. Your program may crash or
loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you
should only call <b>pcre2_jit_match()</b> in UTF mode if you are sure the
subject is valid.
</P>
<P>
The arguments for <b>pcre2_jit_match()</b> are exactly the same as for
<a href="pcre2_match.html"><b>pcre2_match()</b>,</a>
except that the subject string must be specified with a length;
PCRE2_ZERO_TERMINATED is not supported.
</P>
<P>
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
options are ignored.
</P>
<P>
The return values are the same as for <b>pcre2_match()</b> plus
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
that was not compiled. For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the JIT API in the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,75 @@
<html>
<head>
<title>pcre2_jit_stack_assign specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_assign man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function provides control over the memory used by JIT as a run-time stack
when <b>pcre2_match()</b> or <b>pcre2_jit_match()</b> is called with a pattern
that has been successfully processed by the JIT compiler. The information that
determines which stack is used is put into a match context that is subsequently
passed to a matching function. The arguments of this function are:
<pre>
mcontext a pointer to a match context
callback a callback function
callback_data a JIT stack or a value to be passed to the callback
</PRE>
</P>
<P>
If <i>mcontext</i> is NULL, the function returns immediately, without doing
anything.
</P>
<P>
If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32KiB
block on the machine stack is used.
</P>
<P>
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
<i>callback_data</i> must be a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
</P>
<P>
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
argument at the start of matching, in order to set up a JIT stack. If the
result is NULL, the internal 32KiB stack is used; otherwise the return value
must be a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
</P>
<P>
You may safely use the same JIT stack for multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
must use its own JIT stack. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_jit_stack_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_jit_stack *pcre2_jit_stack_create(size_t <i>startsize</i>,</b>
<b> size_t <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to create a stack for use by the code compiled by the JIT
compiler. The first two arguments are a starting size for the stack, and a
maximum size to which it is allowed to grow. The final argument is a general
context, for memory allocation functions, or NULL for standard memory
allocation. The result can be passed to the JIT run-time code by calling
<b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
A maximum stack size of 512KiB to 1MiB should be more than enough for any
pattern. If the stack couldn't be allocated or the values passed were not
reasonable, NULL will be returned. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_jit_stack_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_jit_stack_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to free a JIT stack that was created by
<b>pcre2_jit_stack_create()</b> when it is no longer needed. If the argument is
NULL, the function returns immediately without doing anything. For more
details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,48 @@
<html>
<head>
<title>pcre2_maketables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_maketables man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>const uint8_t *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function builds a set of character tables for character code points that
are less than 256. These can be passed to <b>pcre2_compile()</b> in a compile
context in order to override the internal, built-in tables (which were either
defaulted or made by <b>pcre2_maketables()</b> when PCRE2 was compiled). See the
<a href="pcre2_set_character_tables.html"><b>pcre2_set_character_tables()</b></a>
page. You might want to do this if you are using a non-standard locale.
</P>
<P>
If the argument is NULL, <b>malloc()</b> is used to get memory for the tables.
Otherwise it must point to a general context, which can supply pointers to a
custom memory manager. The function yields a pointer to the tables.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_maketables_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_maketables_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_maketables_free(pcre2_general_context *<i>gcontext</i>,</b>
<b> const uint8_t *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function discards a set of character tables that were created by a call
to
<a href="pcre2_maketables.html"><b>pcre2_maketables()</b>.</a>
</P>
<P>
The <i>gcontext</i> parameter should match what was used in that call to
account for any custom allocators that might be in use; if it is NULL
the system <b>free()</b> is used.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,87 @@
<html>
<head>
<title>pcre2_match specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using a matching algorithm that is similar to Perl's. It returns
offsets to what it has matched and to captured substrings via the
<b>match_data</b> block, which can be processed by functions with names that
start with <b>pcre2_get_ovector_...()</b> or <b>pcre2_substring_...()</b>. The
return from <b>pcre2_match()</b> is one more than the highest numbered capturing
pair that has been set (for example, 1 if there are no captures), zero if the
vector of offsets is too small, or a negative error code for no match and other
errors. The function arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>match_data</i> Points to a match data block, for results
<i>mcontext</i> Points to a match context, or is NULL
</pre>
A match context is needed only if you want to:
<pre>
Set up a callout function
Set a matching offset limit
Change the heap memory limit
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match
</pre>
The <i>length</i> and <i>startoffset</i> values are code units, not characters.
The length may be given as PCRE2_ZERO_TERMINATED for a subject that is
terminated by a binary zero code unit. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_COPY_MATCHED_SUBJECT
On success, make a private subject copy
PCRE2_DISABLE_RECURSELOOP_CHECK
Only useful in rare cases; use with care
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject is not a valid match
PCRE2_NO_JIT Do not use JIT matching
PCRE2_NO_UTF_CHECK Do not check the subject for UTF validity (only relevant if PCRE2_UTF
was set at compile time)
PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
</pre>
For details of partial matching, see the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
page. There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_match_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_context *pcre2_match_context_copy(</b>
<b> pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes a new copy of a match context, using the memory
allocation function that was used for the original context. The result is NULL
if the memory cannot be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_match_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_context *pcre2_match_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates and initializes a new match context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_match_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a match context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set. If the argument is NULL, the function returns
immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,50 @@
<html>
<head>
<title>pcre2_match_data_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates a new match data block, which is used for holding the
result of a match. The first argument specifies the number of pairs of offsets
that are required. These form the "output vector" (ovector) within the match
data block, and are used to identify the matched string and any captured
substrings when matching with <b>pcre2_match()</b>, or a number of different
matches at the same point when used with <b>pcre2_dfa_match()</b>. There is
always one pair of offsets; if <b>ovecsize</b> is zero, it is treated as one.
</P>
<P>
The second argument points to a general context, for custom memory management,
or is NULL for system memory management. The result of the function is NULL if
the memory for the block could not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,53 @@
<html>
<head>
<title>pcre2_match_data_create_from_pattern specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_create_from_pattern man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
<b> const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function creates a new match data block for holding the result of a match.
The first argument points to a compiled pattern. The number of capturing
parentheses within the pattern is used to compute the number of pairs of
offsets that are required in the match data block. These form the "output
vector" (ovector) within the match data block, and are used to identify the
matched string and any captured substrings when matching with
<b>pcre2_match()</b>. If you are using <b>pcre2_dfa_match()</b>, which uses the
output vector in a different way, you should use <b>pcre2_match_data_create()</b>
instead of this function.
</P>
<P>
The second argument points to a general context, for custom memory management,
or is NULL to use the same memory allocator as was used for the compiled
pattern. The result of the function is NULL if the memory for the block could
not be obtained.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,48 @@
<html>
<head>
<title>pcre2_match_data_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_match_data_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
If <i>match_data</i> is NULL, this function does nothing. Otherwise,
<i>match_data</i> must point to a match data block, which this function frees,
using the memory freeing function from the general context or compiled pattern
with which it was created, or <b>free()</b> if that was not set. If the match
data block was previously passed to <b>pcre2_match()</b>, it will have an
attached heapframe vector; this is also freed.
</P>
<P>
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
match data block, the copy of the subject that was referenced within the block
is also freed.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,70 @@
<html>
<head>
<title>pcre2_pattern_convert specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_pattern_convert man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It converts a foreign pattern (for example, a glob) into a PCRE2 regular
expression pattern. Its arguments are:
<pre>
<i>pattern</i> The foreign pattern
<i>length</i> The length of the input pattern or PCRE2_ZERO_TERMINATED
<i>options</i> Option bits
<i>buffer</i> Pointer to pointer to output buffer, or NULL
<i>blength</i> Pointer to output length field
<i>cvcontext</i> Pointer to a convert context or NULL
</pre>
The length of the converted pattern (excluding the terminating zero) is
returned via <i>blength</i>. If <i>buffer</i> is NULL, the function just returns
the output length. If <i>buffer</i> points to a NULL pointer, heap memory is
obtained for the converted pattern, using the allocator in the context if
present (or else <b>malloc()</b>), and the field pointed to by <i>buffer</i> is
updated. If <i>buffer</i> points to a non-NULL field, that must point to a
buffer whose size is in the variable pointed to by <i>blength</i>. This value is
updated.
</P>
<P>
The option bits are:
<pre>
PCRE2_CONVERT_UTF Input is UTF
PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
PCRE2_CONVERT_GLOB ) Convert
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
</pre>
The return value from <b>pcre2_pattern_convert()</b> is zero on success or a
non-zero PCRE2 error code.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,109 @@
<html>
<head>
<title>pcre2_pattern_info specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_pattern_info man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_pattern_info(const pcre2_code *<i>code</i>, uint32_t <i>what</i>,</b>
<b> void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns information about a compiled pattern. Its arguments are:
<pre>
<i>code</i> Pointer to a compiled regular expression pattern
<i>what</i> What information is required
<i>where</i> Where to put the information
</pre>
The recognized values for the <i>what</i> argument, and the information they
request are as follows:
<pre>
PCRE2_INFO_ALLOPTIONS Final options after compiling
PCRE2_INFO_ARGOPTIONS Options passed to <b>pcre2_compile()</b>
PCRE2_INFO_BACKREFMAX Number of highest backreference
PCRE2_INFO_BSR What \R matches:
PCRE2_BSR_UNICODE: Unicode line endings
PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns
PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set, otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the
compile context
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
0 nothing set
1 first code unit is set
2 start of string or after newline
PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1
PCRE2_INFO_FRAMESIZE Size of backtracking frame
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
0 nothing set
1 code unit is set
PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1
PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an empty string, 0 otherwise
PCRE2_INFO_MATCHLIMIT Match limit if set, otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest lookbehind assertion
PCRE2_INFO_MINLENGTH Lower bound length of matching strings
PCRE2_INFO_NAMECOUNT Number of named subpatterns
PCRE2_INFO_NAMEENTRYSIZE Size of name table entries
PCRE2_INFO_NAMETABLE Pointer to name table
PCRE2_CONFIG_NEWLINE Code for the newline sequence:
PCRE2_NEWLINE_CR
PCRE2_NEWLINE_LF
PCRE2_NEWLINE_CRLF
PCRE2_NEWLINE_ANY
PCRE2_NEWLINE_ANYCRLF
PCRE2_NEWLINE_NUL
PCRE2_INFO_RECURSIONLIMIT Obsolete synonym for PCRE2_INFO_DEPTHLIMIT
PCRE2_INFO_SIZE Size of compiled pattern
</pre>
If <i>where</i> is NULL, the function returns the amount of memory needed for
the requested information, in bytes. Otherwise, the <i>where</i> argument must
point to an unsigned 32-bit integer (uint32_t variable), except for the
following <i>what</i> values, when it must point to a variable of the type
shown:
<pre>
PCRE2_INFO_FIRSTBITMAP const uint8_t *
PCRE2_INFO_JITSIZE size_t
PCRE2_INFO_NAMETABLE PCRE2_SPTR
PCRE2_INFO_SIZE size_t
</pre>
The yield of the function is zero on success or:
<pre>
PCRE2_ERROR_NULL the argument <i>code</i> is NULL
PCRE2_ERROR_BADMAGIC the "magic number" was not found
PCRE2_ERROR_BADOPTION the value of <i>what</i> is invalid
PCRE2_ERROR_BADMODE the pattern was compiled in the wrong mode
PCRE2_ERROR_UNSET the requested information is not set
</PRE>
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,65 @@
<html>
<head>
<title>pcre2_serialize_decode specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_decode man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
<b> int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function decodes a serialized set of compiled patterns back into a list of
individual patterns. This is possible only on a host that is running the same
version of PCRE2, with the same code unit width, and the host must also have
the same endianness, pointer width and PCRE2_SIZE type. The arguments for
<b>pcre2_serialize_decode()</b> are:
<pre>
<i>codes</i> pointer to a vector in which to build the list
<i>number_of_codes</i> number of slots in the vector
<i>bytes</i> the serialized byte stream
<i>gcontext</i> pointer to a general context or NULL
</pre>
The <i>bytes</i> argument must point to a block of data that was originally
created by <b>pcre2_serialize_encode()</b>, though it may have been saved on
disc or elsewhere in the meantime. If there are more codes in the serialized
data than slots in the list, only those compiled patterns that will fit are
decoded. The yield of the function is the number of decoded patterns, or one of
the following negative error codes:
<pre>
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
PCRE2_ERROR_NOMEMORY memory allocation failed
PCRE2_ERROR_NULL <i>codes</i> or <i>bytes</i> is NULL
</pre>
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
on a system with different endianness.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the serialization functions in the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,66 @@
<html>
<head>
<title>pcre2_serialize_encode specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_encode man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b>
<b> int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b>
<b> PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function encodes a list of compiled patterns into a byte stream that can
be saved on disc or elsewhere. Note that this is not an abstract format like
Java or .NET. Conversion of the byte stream back into usable compiled patterns
can only happen on a host that is running the same version of PCRE2, with the
same code unit width, and the host must also have the same endianness, pointer
width and PCRE2_SIZE type. The arguments for <b>pcre2_serialize_encode()</b>
are:
<pre>
<i>codes</i> pointer to a vector containing the list
<i>number_of_codes</i> number of slots in the vector
<i>serialized_bytes</i> set to point to the serialized byte stream
<i>serialized_size</i> set to the number of bytes in the byte stream
<i>gcontext</i> pointer to a general context or NULL
</pre>
The context argument is used to obtain memory for the byte stream. When the
serialized data is no longer needed, it must be freed by calling
<b>pcre2_serialize_free()</b>. The yield of the function is the number of
serialized patterns, or one of the following negative error codes:
<pre>
PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less
PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns
PCRE2_ERROR_MEMORY memory allocation failed
PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables
PCRE2_ERROR_NULL an argument other than <i>gcontext</i> is NULL
</pre>
PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or
that a slot in the vector does not point to a compiled pattern.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the serialization functions in the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_serialize_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_serialize_free(uint8_t *<i>bytes</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function frees the memory that was obtained by
<b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
must point to such a byte stream or be NULL, in which case the function returns
without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the serialization functions in the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,49 @@
<html>
<head>
<title>pcre2_serialize_get_number_of_codes specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_serialize_get_number_of_codes man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int32_t pcre2_serialize_get_number_of_codes(const uint8_t *<i>bytes</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
The <i>bytes</i> argument must point to a serialized byte stream that was
originally created by <b>pcre2_serialize_encode()</b> (though it may have been
saved on disc or elsewhere in the meantime). The function returns the number of
serialized patterns in the byte stream, or one of the following negative error
codes:
<pre>
PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i>
PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version
PCRE2_ERROR_NULL the argument is NULL
</pre>
PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
on a system with different endianness.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the serialization functions in the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_set_bsr specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_bsr man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the convention for processing \R within a compile context.
The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The
result is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_set_callout specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_callout man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the callout fields in a match context (the first argument).
The second argument specifies a callout function, and the third argument is an
opaque data item that is passed to it. The result of this function is always
zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,45 @@
<html>
<head>
<title>pcre2_set_character_tables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_character_tables man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
<b> const uint8_t *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets a pointer to custom character tables within a compile
context. The second argument must point to a set of PCRE2 character tables or
be NULL to request the default tables. The result is always zero. Character
tables can be created by calling <b>pcre2_maketables()</b> or by running the
<b>pcre2_dftables</b> maintenance command in binary mode (see the
<a href="pcre2build.html"><b>pcre2build</b></a>
documentation).
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,58 @@
<html>
<head>
<title>pcre2_set_compile_extra_options specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_compile_extra_options man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>extra_options</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets additional option bits for <b>pcre2_compile()</b> that are
housed in a compile context. It completely replaces all the bits. The extra
options are:
<pre>
PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \K in lookarounds
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
PCRE2_EXTRA_ASCII_BSD \d remains ASCII in UCP mode
PCRE2_EXTRA_ASCII_BSS \s remains ASCII in UCP mode
PCRE2_EXTRA_ASCII_BSW \w remains ASCII in UCP mode
PCRE2_EXTRA_ASCII_DIGIT [:digit:] and [:xdigit:] POSIX classes remain ASCII in UCP mode
PCRE2_EXTRA_ASCII_POSIX POSIX classes remain ASCII in UCP mode
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character
PCRE2_EXTRA_CASELESS_RESTRICT Disable mixed ASCII/non-ASCII case folding
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
PCRE2_EXTRA_MATCH_WORD Pattern matches "words"
PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern
PCRE2_EXTRA_NO_BS0 Disallow \0 (but not \00 or \000)
PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal
PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding
</pre>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,46 @@
<html>
<head>
<title>pcre2_set_compile_recursion_guard specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_compile_recursion_guard man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function defines, within a compile context, a function that is called
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
pattern. The first argument to the function gives the current depth of
parenthesis nesting, and the second is user data that is supplied when the
function is set up. The callout function should return zero if all is well, or
non-zero to force an error. This feature is provided so that applications can
check the available system stack space, in order to avoid running out. The
result of <b>pcre2_set_compile_recursion_guard()</b> is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_set_depth_limit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_depth_limit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_depth_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the backtracking depth limit field in a match context. The
result is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_set_glob_escape specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_glob_escape man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>escape_char</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It sets the escape character that is used when converting globs. The second
argument must either be zero (meaning there is no escape character) or a
punctuation character whose code point is less than 256. The default is grave
accent if running under Windows, otherwise backslash. The result of the
function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_set_glob_separator specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_glob_separator man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>separator_char</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It sets the component separator character that is used when converting globs.
The second argument must be one of the characters forward slash, backslash, or
dot. The default is backslash when running under Windows, otherwise forward
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
the second argument is invalid.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_set_heap_limit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_heap_limit man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the backtracking heap limit field in a match context. The
result is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

Some files were not shown because too many files have changed in this diff Show More