aboutsummaryrefslogtreecommitdiffstats
path: root/subversion/libsvn_diff
diff options
context:
space:
mode:
authorPeter Wemm <peter@FreeBSD.org>2013-06-18 02:07:41 +0000
committerPeter Wemm <peter@FreeBSD.org>2013-06-18 02:07:41 +0000
commit32547653cc5376642e1231fb644db99933ac8db4 (patch)
tree135691142dc0e75a5e5d97b5074d03436435b8e0 /subversion/libsvn_diff
downloadsrc-32547653cc5376642e1231fb644db99933ac8db4.tar.gz
src-32547653cc5376642e1231fb644db99933ac8db4.zip
Import trimmed svn-1.8.0-rc3vendor/subversion/subversion-1.8.0-rc3
Notes
Notes: svn path=/vendor/subversion/dist/; revision=251881 svn path=/vendor/subversion/subversion-1.8.0-rc3/; revision=251882; tag=vendor/subversion/subversion-1.8.0-rc3
Diffstat (limited to 'subversion/libsvn_diff')
-rw-r--r--subversion/libsvn_diff/deprecated.c289
-rw-r--r--subversion/libsvn_diff/diff.c199
-rw-r--r--subversion/libsvn_diff/diff.h217
-rw-r--r--subversion/libsvn_diff/diff3.c529
-rw-r--r--subversion/libsvn_diff/diff4.c314
-rw-r--r--subversion/libsvn_diff/diff_file.c2414
-rw-r--r--subversion/libsvn_diff/diff_memory.c1161
-rw-r--r--subversion/libsvn_diff/diff_tree.c1705
-rw-r--r--subversion/libsvn_diff/lcs.c375
-rw-r--r--subversion/libsvn_diff/parse-diff.c1373
-rw-r--r--subversion/libsvn_diff/token.c198
-rw-r--r--subversion/libsvn_diff/util.c591
12 files changed, 9365 insertions, 0 deletions
diff --git a/subversion/libsvn_diff/deprecated.c b/subversion/libsvn_diff/deprecated.c
new file mode 100644
index 000000000000..891ad5fa8fc0
--- /dev/null
+++ b/subversion/libsvn_diff/deprecated.c
@@ -0,0 +1,289 @@
+/*
+ * deprecated.c: holding file for all deprecated APIs.
+ * "we can't lose 'em, but we can shun 'em!"
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+/* ==================================================================== */
+
+
+
+/*** Includes. ***/
+
+/* We define this here to remove any further warnings about the usage of
+ deprecated functions in this file. */
+#define SVN_DEPRECATED
+
+#include "svn_diff.h"
+#include "svn_utf.h"
+
+#include "svn_private_config.h"
+
+
+
+
+/*** Code. ***/
+struct fns_wrapper_baton
+{
+ /* We put the old baton in front of this one, so that we can still use
+ this baton in place of the old. This prevents us from having to
+ implement simple wrappers around each member of diff_fns_t. */
+ void *old_baton;
+ const svn_diff_fns_t *vtable;
+};
+
+static svn_error_t *
+datasources_open(void *baton,
+ apr_off_t *prefix_lines,
+ apr_off_t *suffix_lines,
+ const svn_diff_datasource_e *datasources,
+ apr_size_t datasource_len)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ apr_size_t i;
+
+ /* Just iterate over the datasources, using the old singular version. */
+ for (i = 0; i < datasource_len; i++)
+ {
+ SVN_ERR(fwb->vtable->datasource_open(fwb->old_baton, datasources[i]));
+ }
+
+ /* Don't claim any prefix or suffix matches. */
+ *prefix_lines = 0;
+ *suffix_lines = 0;
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+datasource_close(void *baton,
+ svn_diff_datasource_e datasource)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ return fwb->vtable->datasource_close(fwb->old_baton, datasource);
+}
+
+static svn_error_t *
+datasource_get_next_token(apr_uint32_t *hash,
+ void **token,
+ void *baton,
+ svn_diff_datasource_e datasource)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ return fwb->vtable->datasource_get_next_token(hash, token, fwb->old_baton,
+ datasource);
+}
+
+static svn_error_t *
+token_compare(void *baton,
+ void *ltoken,
+ void *rtoken,
+ int *compare)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ return fwb->vtable->token_compare(fwb->old_baton, ltoken, rtoken, compare);
+}
+
+static void
+token_discard(void *baton,
+ void *token)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ fwb->vtable->token_discard(fwb->old_baton, token);
+}
+
+static void
+token_discard_all(void *baton)
+{
+ struct fns_wrapper_baton *fwb = baton;
+ fwb->vtable->token_discard_all(fwb->old_baton);
+}
+
+
+static void
+wrap_diff_fns(svn_diff_fns2_t **diff_fns2,
+ struct fns_wrapper_baton **baton2,
+ const svn_diff_fns_t *diff_fns,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ /* Initialize the return vtable. */
+ *diff_fns2 = apr_palloc(result_pool, sizeof(**diff_fns2));
+
+ (*diff_fns2)->datasources_open = datasources_open;
+ (*diff_fns2)->datasource_close = datasource_close;
+ (*diff_fns2)->datasource_get_next_token = datasource_get_next_token;
+ (*diff_fns2)->token_compare = token_compare;
+ (*diff_fns2)->token_discard = token_discard;
+ (*diff_fns2)->token_discard_all = token_discard_all;
+
+ /* Initialize the wrapper baton. */
+ *baton2 = apr_palloc(result_pool, sizeof (**baton2));
+ (*baton2)->old_baton = baton;
+ (*baton2)->vtable = diff_fns;
+}
+
+
+/*** From diff_file.c ***/
+svn_error_t *
+svn_diff_file_output_unified2(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_path,
+ const char *modified_path,
+ const char *original_header,
+ const char *modified_header,
+ const char *header_encoding,
+ apr_pool_t *pool)
+{
+ return svn_diff_file_output_unified3(output_stream, diff,
+ original_path, modified_path,
+ original_header, modified_header,
+ header_encoding, NULL, FALSE, pool);
+}
+
+svn_error_t *
+svn_diff_file_output_unified(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_path,
+ const char *modified_path,
+ const char *original_header,
+ const char *modified_header,
+ apr_pool_t *pool)
+{
+ return svn_diff_file_output_unified2(output_stream, diff,
+ original_path, modified_path,
+ original_header, modified_header,
+ SVN_APR_LOCALE_CHARSET, pool);
+}
+
+svn_error_t *
+svn_diff_file_diff(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ apr_pool_t *pool)
+{
+ return svn_diff_file_diff_2(diff, original, modified,
+ svn_diff_file_options_create(pool), pool);
+}
+
+svn_error_t *
+svn_diff_file_diff3(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ const char *latest,
+ apr_pool_t *pool)
+{
+ return svn_diff_file_diff3_2(diff, original, modified, latest,
+ svn_diff_file_options_create(pool), pool);
+}
+
+svn_error_t *
+svn_diff_file_diff4(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ const char *latest,
+ const char *ancestor,
+ apr_pool_t *pool)
+{
+ return svn_diff_file_diff4_2(diff, original, modified, latest, ancestor,
+ svn_diff_file_options_create(pool), pool);
+}
+
+svn_error_t *
+svn_diff_file_output_merge(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_path,
+ const char *modified_path,
+ const char *latest_path,
+ const char *conflict_original,
+ const char *conflict_modified,
+ const char *conflict_latest,
+ const char *conflict_separator,
+ svn_boolean_t display_original_in_conflict,
+ svn_boolean_t display_resolved_conflicts,
+ apr_pool_t *pool)
+{
+ svn_diff_conflict_display_style_t style =
+ svn_diff_conflict_display_modified_latest;
+
+ if (display_resolved_conflicts)
+ style = svn_diff_conflict_display_resolved_modified_latest;
+
+ if (display_original_in_conflict)
+ style = svn_diff_conflict_display_modified_original_latest;
+
+ return svn_diff_file_output_merge2(output_stream,
+ diff,
+ original_path,
+ modified_path,
+ latest_path,
+ conflict_original,
+ conflict_modified,
+ conflict_latest,
+ conflict_separator,
+ style,
+ pool);
+}
+
+
+/*** From diff.c ***/
+svn_error_t *
+svn_diff_diff(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff_fns2_t *diff_fns2;
+ struct fns_wrapper_baton *fwb;
+
+ wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+ return svn_diff_diff_2(diff, fwb, diff_fns2, pool);
+}
+
+
+/*** From diff3.c ***/
+svn_error_t *
+svn_diff_diff3(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff_fns2_t *diff_fns2;
+ struct fns_wrapper_baton *fwb;
+
+ wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+ return svn_diff_diff3_2(diff, fwb, diff_fns2, pool);
+}
+
+
+/*** From diff4.c ***/
+svn_error_t *
+svn_diff_diff4(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff_fns2_t *diff_fns2;
+ struct fns_wrapper_baton *fwb;
+
+ wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+ return svn_diff_diff4_2(diff, fwb, diff_fns2, pool);
+}
diff --git a/subversion/libsvn_diff/diff.c b/subversion/libsvn_diff/diff.c
new file mode 100644
index 000000000000..f43a3be405e2
--- /dev/null
+++ b/subversion/libsvn_diff/diff.c
@@ -0,0 +1,199 @@
+/*
+ * diff.c : routines for doing diffs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "svn_pools.h"
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+
+#include "diff.h"
+
+
+svn_diff__token_index_t*
+svn_diff__get_token_counts(svn_diff__position_t *loop_start,
+ svn_diff__token_index_t num_tokens,
+ apr_pool_t *pool)
+{
+ svn_diff__token_index_t *token_counts;
+ svn_diff__token_index_t token_index;
+ svn_diff__position_t *current;
+
+ token_counts = apr_palloc(pool, num_tokens * sizeof(*token_counts));
+ for (token_index = 0; token_index < num_tokens; token_index++)
+ token_counts[token_index] = 0;
+
+ current = loop_start;
+ if (current != NULL)
+ {
+ do
+ {
+ token_counts[current->token_index]++;
+ current = current->next;
+ }
+ while (current != loop_start);
+ }
+
+ return token_counts;
+}
+
+
+svn_diff_t *
+svn_diff__diff(svn_diff__lcs_t *lcs,
+ apr_off_t original_start, apr_off_t modified_start,
+ svn_boolean_t want_common,
+ apr_pool_t *pool)
+{
+ svn_diff_t *diff;
+ svn_diff_t **diff_ref = &diff;
+
+ while (1)
+ {
+ if (original_start < lcs->position[0]->offset
+ || modified_start < lcs->position[1]->offset)
+ {
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_diff_modified;
+ (*diff_ref)->original_start = original_start - 1;
+ (*diff_ref)->original_length =
+ lcs->position[0]->offset - original_start;
+ (*diff_ref)->modified_start = modified_start - 1;
+ (*diff_ref)->modified_length =
+ lcs->position[1]->offset - modified_start;
+ (*diff_ref)->latest_start = 0;
+ (*diff_ref)->latest_length = 0;
+
+ diff_ref = &(*diff_ref)->next;
+ }
+
+ /* Detect the EOF */
+ if (lcs->length == 0)
+ break;
+
+ original_start = lcs->position[0]->offset;
+ modified_start = lcs->position[1]->offset;
+
+ if (want_common)
+ {
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_common;
+ (*diff_ref)->original_start = original_start - 1;
+ (*diff_ref)->original_length = lcs->length;
+ (*diff_ref)->modified_start = modified_start - 1;
+ (*diff_ref)->modified_length = lcs->length;
+ (*diff_ref)->latest_start = 0;
+ (*diff_ref)->latest_length = 0;
+
+ diff_ref = &(*diff_ref)->next;
+ }
+
+ original_start += lcs->length;
+ modified_start += lcs->length;
+
+ lcs = lcs->next;
+ }
+
+ *diff_ref = NULL;
+
+ return diff;
+}
+
+
+svn_error_t *
+svn_diff_diff_2(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff__tree_t *tree;
+ svn_diff__position_t *position_list[2];
+ svn_diff__token_index_t num_tokens;
+ svn_diff__token_index_t *token_counts[2];
+ svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+ svn_diff_datasource_modified};
+ svn_diff__lcs_t *lcs;
+ apr_pool_t *subpool;
+ apr_pool_t *treepool;
+ apr_off_t prefix_lines = 0;
+ apr_off_t suffix_lines = 0;
+
+ *diff = NULL;
+
+ subpool = svn_pool_create(pool);
+ treepool = svn_pool_create(pool);
+
+ svn_diff__tree_create(&tree, treepool);
+
+ SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
+ datasource, 2));
+
+ /* Insert the data into the tree */
+ SVN_ERR(svn_diff__get_tokens(&position_list[0],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_original,
+ prefix_lines,
+ subpool));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[1],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_modified,
+ prefix_lines,
+ subpool));
+
+ num_tokens = svn_diff__get_node_count(tree);
+
+ /* The cool part is that we don't need the tokens anymore.
+ * Allow the app to clean them up if it wants to.
+ */
+ if (vtable->token_discard_all != NULL)
+ vtable->token_discard_all(diff_baton);
+
+ /* We don't need the nodes in the tree either anymore, nor the tree itself */
+ svn_pool_destroy(treepool);
+
+ token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
+ subpool);
+ token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
+ subpool);
+
+ /* Get the lcs */
+ lcs = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
+ token_counts[1], num_tokens, prefix_lines,
+ suffix_lines, subpool);
+
+ /* Produce the diff */
+ *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);
+
+ /* Get rid of all the data we don't have a use for anymore */
+ svn_pool_destroy(subpool);
+
+ return SVN_NO_ERROR;
+}
diff --git a/subversion/libsvn_diff/diff.h b/subversion/libsvn_diff/diff.h
new file mode 100644
index 000000000000..51a84c640580
--- /dev/null
+++ b/subversion/libsvn_diff/diff.h
@@ -0,0 +1,217 @@
+/*
+ * diff.h : private header file
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#if !defined(DIFF_H)
+#define DIFF_H
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "svn_diff.h"
+#include "svn_types.h"
+
+#define SVN_DIFF__UNIFIED_CONTEXT_SIZE 3
+
+typedef struct svn_diff__node_t svn_diff__node_t;
+typedef struct svn_diff__tree_t svn_diff__tree_t;
+typedef struct svn_diff__position_t svn_diff__position_t;
+typedef struct svn_diff__lcs_t svn_diff__lcs_t;
+
+typedef enum svn_diff__type_e
+{
+ svn_diff__type_common,
+ svn_diff__type_diff_modified,
+ svn_diff__type_diff_latest,
+ svn_diff__type_diff_common,
+ svn_diff__type_conflict
+} svn_diff__type_e;
+
+struct svn_diff_t {
+ svn_diff_t *next;
+ svn_diff__type_e type;
+ apr_off_t original_start;
+ apr_off_t original_length;
+ apr_off_t modified_start;
+ apr_off_t modified_length;
+ apr_off_t latest_start;
+ apr_off_t latest_length;
+ svn_diff_t *resolved_diff;
+};
+
+/* Type used for token indices and counts of tokens. Must be signed. */
+typedef long int svn_diff__token_index_t;
+
+struct svn_diff__position_t
+{
+ svn_diff__position_t *next;
+ svn_diff__token_index_t token_index;
+ apr_off_t offset;
+};
+
+struct svn_diff__lcs_t
+{
+ svn_diff__lcs_t *next;
+ svn_diff__position_t *position[2];
+ apr_off_t length;
+ int refcount;
+};
+
+
+/* State used when normalizing whitespace and EOL styles. */
+typedef enum svn_diff__normalize_state_t
+{
+ /* Initial state; not in a sequence of whitespace. */
+ svn_diff__normalize_state_normal,
+ /* We're in a sequence of whitespace characters. Only entered if
+ we ignore whitespace. */
+ svn_diff__normalize_state_whitespace,
+ /* The previous character was CR. */
+ svn_diff__normalize_state_cr
+} svn_diff__normalize_state_t;
+
+
+/*
+ * Calculate the Longest Common Subsequence (LCS) between two datasources
+ * POSITION_LIST1 and POSITION_LIST2, with TOKEN_COUNTS_LIST1 and
+ * TOKEN_COUNTS_LIST2 the corresponding counts of the different tokens
+ * (indexed by the 'token_index' of the positions of each position_list).
+ *
+ * From the beginning of each list, PREFIX_LINES lines will be assumed to be
+ * equal and be excluded from the comparison process. Similarly, SUFFIX_LINES
+ * at the end of both sequences will be skipped.
+ *
+ * The resulting lcs structure will be the return value of this function.
+ * Allocations will be made from POOL.
+ */
+svn_diff__lcs_t *
+svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
+ svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+ svn_diff__token_index_t *token_counts_list1, /* array of counts */
+ svn_diff__token_index_t *token_counts_list2, /* array of counts */
+ svn_diff__token_index_t num_tokens, /* length of count arrays */
+ apr_off_t prefix_lines,
+ apr_off_t suffix_lines,
+ apr_pool_t *pool);
+
+
+/*
+ * Returns number of tokens in a tree
+ */
+svn_diff__token_index_t
+svn_diff__get_node_count(svn_diff__tree_t *tree);
+
+/*
+ * Support functions to build a tree of token positions
+ */
+void
+svn_diff__tree_create(svn_diff__tree_t **tree, apr_pool_t *pool);
+
+
+/*
+ * Get all tokens from a datasource. Return the
+ * last item in the (circular) list.
+ */
+svn_error_t *
+svn_diff__get_tokens(svn_diff__position_t **position_list,
+ svn_diff__tree_t *tree,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ svn_diff_datasource_e datasource,
+ apr_off_t prefix_lines,
+ apr_pool_t *pool);
+
+/*
+ * Returns an array with the counts for the tokens in
+ * the looped linked list given in loop_start.
+ * num_tokens equals the highest possible token index +1.
+ */
+svn_diff__token_index_t*
+svn_diff__get_token_counts(svn_diff__position_t *loop_start,
+ svn_diff__token_index_t num_tokens,
+ apr_pool_t *pool);
+
+/* Morph a svn_lcs_t into a svn_diff_t. */
+svn_diff_t *
+svn_diff__diff(svn_diff__lcs_t *lcs,
+ apr_off_t original_start, apr_off_t modified_start,
+ svn_boolean_t want_common,
+ apr_pool_t *pool);
+
+void
+svn_diff__resolve_conflict(svn_diff_t *hunk,
+ svn_diff__position_t **position_list1,
+ svn_diff__position_t **position_list2,
+ svn_diff__token_index_t num_tokens,
+ apr_pool_t *pool);
+
+
+/* Normalize the characters pointed to by the buffer BUF (of length *LENGTHP)
+ * according to the options *OPTS, starting in the state *STATEP.
+ *
+ * Adjust *LENGTHP and *STATEP to be the length of the normalized buffer and
+ * the final state, respectively.
+ * Normalized data is written to the memory at *TGT. BUF and TGT may point
+ * to the same memory area. The memory area pointed to by *TGT should be
+ * large enough to hold *LENGTHP bytes.
+ * When on return *TGT is not equal to the value passed in, it points somewhere
+ * into the memory region designated by BUF and *LENGTHP.
+ */
+void
+svn_diff__normalize_buffer(char **tgt,
+ apr_off_t *lengthp,
+ svn_diff__normalize_state_t *statep,
+ const char *buf,
+ const svn_diff_file_options_t *opts);
+
+/* Set *OUT_STR to a newline followed by a "\ No newline at end of file" line.
+ *
+ * The text will be encoded into HEADER_ENCODING.
+ */
+svn_error_t *
+svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
+ const char *header_encoding,
+ apr_pool_t *scratch_pool);
+
+/* Write a unidiff hunk header to OUTPUT_STREAM.
+ *
+ * The header will use HUNK_DELIMITER (which should usually be "@@") before
+ * and after the line-number ranges which are formed from OLD_START,
+ * OLD_LENGTH, NEW_START and NEW_LENGTH. If HUNK_EXTRA_CONTEXT is not NULL,
+ * it will be written after the final delimiter, with an intervening space.
+ *
+ * The text will be encoded into HEADER_ENCODING.
+ */
+svn_error_t *
+svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
+ const char *header_encoding,
+ const char *hunk_delimiter,
+ apr_off_t old_start,
+ apr_off_t old_length,
+ apr_off_t new_start,
+ apr_off_t new_length,
+ const char *hunk_extra_context,
+ apr_pool_t *scratch_pool);
+
+
+#endif /* DIFF_H */
diff --git a/subversion/libsvn_diff/diff3.c b/subversion/libsvn_diff/diff3.c
new file mode 100644
index 000000000000..8b7c9b332817
--- /dev/null
+++ b/subversion/libsvn_diff/diff3.c
@@ -0,0 +1,529 @@
+/*
+ * diff3.c : routines for doing diffs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "svn_pools.h"
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+
+#include "diff.h"
+
+
+void
+svn_diff__resolve_conflict(svn_diff_t *hunk,
+ svn_diff__position_t **position_list1,
+ svn_diff__position_t **position_list2,
+ svn_diff__token_index_t num_tokens,
+ apr_pool_t *pool)
+{
+ apr_off_t modified_start = hunk->modified_start + 1;
+ apr_off_t latest_start = hunk->latest_start + 1;
+ apr_off_t common_length;
+ apr_off_t modified_length = hunk->modified_length;
+ apr_off_t latest_length = hunk->latest_length;
+ svn_diff__position_t *start_position[2];
+ svn_diff__position_t *position[2];
+ svn_diff__token_index_t *token_counts[2];
+ svn_diff__lcs_t *lcs = NULL;
+ svn_diff__lcs_t **lcs_ref = &lcs;
+ svn_diff_t **diff_ref = &hunk->resolved_diff;
+ apr_pool_t *subpool;
+
+ /* First find the starting positions for the
+ * comparison
+ */
+
+ start_position[0] = *position_list1;
+ start_position[1] = *position_list2;
+
+ while (start_position[0]->offset < modified_start)
+ start_position[0] = start_position[0]->next;
+
+ while (start_position[1]->offset < latest_start)
+ start_position[1] = start_position[1]->next;
+
+ position[0] = start_position[0];
+ position[1] = start_position[1];
+
+ common_length = modified_length < latest_length
+ ? modified_length : latest_length;
+
+ while (common_length > 0
+ && position[0]->token_index == position[1]->token_index)
+ {
+ position[0] = position[0]->next;
+ position[1] = position[1]->next;
+
+ common_length--;
+ }
+
+ if (common_length == 0
+ && modified_length == latest_length)
+ {
+ hunk->type = svn_diff__type_diff_common;
+ hunk->resolved_diff = NULL;
+
+ *position_list1 = position[0];
+ *position_list2 = position[1];
+
+ return;
+ }
+
+ hunk->type = svn_diff__type_conflict;
+
+ /* ### If we have a conflict we can try to find the
+ * ### common parts in it by getting an lcs between
+ * ### modified (start to start + length) and
+ * ### latest (start to start + length).
+ * ### We use this lcs to create a simple diff. Only
+ * ### where there is a diff between the two, we have
+ * ### a conflict.
+ * ### This raises a problem; several common diffs and
+ * ### conflicts can occur within the same original
+ * ### block. This needs some thought.
+ * ###
+ * ### NB: We can use the node _pointers_ to identify
+ * ### different tokens
+ */
+
+ subpool = svn_pool_create(pool);
+
+ /* Calculate how much of the two sequences was
+ * actually the same.
+ */
+ common_length = (modified_length < latest_length
+ ? modified_length : latest_length)
+ - common_length;
+
+ /* If there were matching symbols at the start of
+ * both sequences, record that fact.
+ */
+ if (common_length > 0)
+ {
+ lcs = apr_palloc(subpool, sizeof(*lcs));
+ lcs->next = NULL;
+ lcs->position[0] = start_position[0];
+ lcs->position[1] = start_position[1];
+ lcs->length = common_length;
+
+ lcs_ref = &lcs->next;
+ }
+
+ modified_length -= common_length;
+ latest_length -= common_length;
+
+ modified_start = start_position[0]->offset;
+ latest_start = start_position[1]->offset;
+
+ start_position[0] = position[0];
+ start_position[1] = position[1];
+
+ /* Create a new ring for svn_diff__lcs to grok.
+ * We can safely do this given we don't need the
+ * positions we processed anymore.
+ */
+ if (modified_length == 0)
+ {
+ *position_list1 = position[0];
+ position[0] = NULL;
+ }
+ else
+ {
+ while (--modified_length)
+ position[0] = position[0]->next;
+
+ *position_list1 = position[0]->next;
+ position[0]->next = start_position[0];
+ }
+
+ if (latest_length == 0)
+ {
+ *position_list2 = position[1];
+ position[1] = NULL;
+ }
+ else
+ {
+ while (--latest_length)
+ position[1] = position[1]->next;
+
+ *position_list2 = position[1]->next;
+ position[1]->next = start_position[1];
+ }
+
+ token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
+ subpool);
+ token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
+ subpool);
+
+ *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
+ token_counts[1], num_tokens, 0, 0, subpool);
+
+ /* Fix up the EOF lcs element in case one of
+ * the two sequences was NULL.
+ */
+ if ((*lcs_ref)->position[0]->offset == 1)
+ (*lcs_ref)->position[0] = *position_list1;
+
+ if ((*lcs_ref)->position[1]->offset == 1)
+ (*lcs_ref)->position[1] = *position_list2;
+
+ /* Produce the resolved diff */
+ while (1)
+ {
+ if (modified_start < lcs->position[0]->offset
+ || latest_start < lcs->position[1]->offset)
+ {
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_conflict;
+ (*diff_ref)->original_start = hunk->original_start;
+ (*diff_ref)->original_length = hunk->original_length;
+ (*diff_ref)->modified_start = modified_start - 1;
+ (*diff_ref)->modified_length = lcs->position[0]->offset
+ - modified_start;
+ (*diff_ref)->latest_start = latest_start - 1;
+ (*diff_ref)->latest_length = lcs->position[1]->offset
+ - latest_start;
+ (*diff_ref)->resolved_diff = NULL;
+
+ diff_ref = &(*diff_ref)->next;
+ }
+
+ /* Detect the EOF */
+ if (lcs->length == 0)
+ break;
+
+ modified_start = lcs->position[0]->offset;
+ latest_start = lcs->position[1]->offset;
+
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_diff_common;
+ (*diff_ref)->original_start = hunk->original_start;
+ (*diff_ref)->original_length = hunk->original_length;
+ (*diff_ref)->modified_start = modified_start - 1;
+ (*diff_ref)->modified_length = lcs->length;
+ (*diff_ref)->latest_start = latest_start - 1;
+ (*diff_ref)->latest_length = lcs->length;
+ (*diff_ref)->resolved_diff = NULL;
+
+ diff_ref = &(*diff_ref)->next;
+
+ modified_start += lcs->length;
+ latest_start += lcs->length;
+
+ lcs = lcs->next;
+ }
+
+ *diff_ref = NULL;
+
+ svn_pool_destroy(subpool);
+}
+
+
+svn_error_t *
+svn_diff_diff3_2(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff__tree_t *tree;
+ svn_diff__position_t *position_list[3];
+ svn_diff__token_index_t num_tokens;
+ svn_diff__token_index_t *token_counts[3];
+ svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+ svn_diff_datasource_modified,
+ svn_diff_datasource_latest};
+ svn_diff__lcs_t *lcs_om;
+ svn_diff__lcs_t *lcs_ol;
+ apr_pool_t *subpool;
+ apr_pool_t *treepool;
+ apr_off_t prefix_lines = 0;
+ apr_off_t suffix_lines = 0;
+
+ *diff = NULL;
+
+ subpool = svn_pool_create(pool);
+ treepool = svn_pool_create(pool);
+
+ svn_diff__tree_create(&tree, treepool);
+
+ SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
+ datasource, 3));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[0],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_original,
+ prefix_lines,
+ subpool));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[1],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_modified,
+ prefix_lines,
+ subpool));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[2],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_latest,
+ prefix_lines,
+ subpool));
+
+ num_tokens = svn_diff__get_node_count(tree);
+
+ /* Get rid of the tokens, we don't need them to calc the diff */
+ if (vtable->token_discard_all != NULL)
+ vtable->token_discard_all(diff_baton);
+
+ /* We don't need the nodes in the tree either anymore, nor the tree itself */
+ svn_pool_destroy(treepool);
+
+ token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
+ subpool);
+ token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
+ subpool);
+ token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
+ subpool);
+
+ /* Get the lcs for original-modified and original-latest */
+ lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
+ token_counts[1], num_tokens, prefix_lines,
+ suffix_lines, subpool);
+ lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
+ token_counts[2], num_tokens, prefix_lines,
+ suffix_lines, subpool);
+
+ /* Produce a merged diff */
+ {
+ svn_diff_t **diff_ref = diff;
+
+ apr_off_t original_start = 1;
+ apr_off_t modified_start = 1;
+ apr_off_t latest_start = 1;
+ apr_off_t original_sync;
+ apr_off_t modified_sync;
+ apr_off_t latest_sync;
+ apr_off_t common_length;
+ apr_off_t modified_length;
+ apr_off_t latest_length;
+ svn_boolean_t is_modified;
+ svn_boolean_t is_latest;
+ svn_diff__position_t sentinel_position[2];
+
+ /* Point the position lists to the start of the list
+ * so that common_diff/conflict detection actually is
+ * able to work.
+ */
+ if (position_list[1])
+ {
+ sentinel_position[0].next = position_list[1]->next;
+ sentinel_position[0].offset = position_list[1]->offset + 1;
+ position_list[1]->next = &sentinel_position[0];
+ position_list[1] = sentinel_position[0].next;
+ }
+ else
+ {
+ sentinel_position[0].offset = prefix_lines + 1;
+ sentinel_position[0].next = NULL;
+ position_list[1] = &sentinel_position[0];
+ }
+
+ if (position_list[2])
+ {
+ sentinel_position[1].next = position_list[2]->next;
+ sentinel_position[1].offset = position_list[2]->offset + 1;
+ position_list[2]->next = &sentinel_position[1];
+ position_list[2] = sentinel_position[1].next;
+ }
+ else
+ {
+ sentinel_position[1].offset = prefix_lines + 1;
+ sentinel_position[1].next = NULL;
+ position_list[2] = &sentinel_position[1];
+ }
+
+ while (1)
+ {
+ /* Find the sync points */
+ while (1)
+ {
+ if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
+ {
+ original_sync = lcs_om->position[0]->offset;
+
+ while (lcs_ol->position[0]->offset + lcs_ol->length
+ < original_sync)
+ lcs_ol = lcs_ol->next;
+
+ /* If the sync point is the EOF, and our current lcs segment
+ * doesn't reach as far as EOF, we need to skip this segment.
+ */
+ if (lcs_om->length == 0 && lcs_ol->length > 0
+ && lcs_ol->position[0]->offset + lcs_ol->length
+ == original_sync
+ && lcs_ol->position[1]->offset + lcs_ol->length
+ != lcs_ol->next->position[1]->offset)
+ lcs_ol = lcs_ol->next;
+
+ if (lcs_ol->position[0]->offset <= original_sync)
+ break;
+ }
+ else
+ {
+ original_sync = lcs_ol->position[0]->offset;
+
+ while (lcs_om->position[0]->offset + lcs_om->length
+ < original_sync)
+ lcs_om = lcs_om->next;
+
+ /* If the sync point is the EOF, and our current lcs segment
+ * doesn't reach as far as EOF, we need to skip this segment.
+ */
+ if (lcs_ol->length == 0 && lcs_om->length > 0
+ && lcs_om->position[0]->offset + lcs_om->length
+ == original_sync
+ && lcs_om->position[1]->offset + lcs_om->length
+ != lcs_om->next->position[1]->offset)
+ lcs_om = lcs_om->next;
+
+ if (lcs_om->position[0]->offset <= original_sync)
+ break;
+ }
+ }
+
+ modified_sync = lcs_om->position[1]->offset
+ + (original_sync - lcs_om->position[0]->offset);
+ latest_sync = lcs_ol->position[1]->offset
+ + (original_sync - lcs_ol->position[0]->offset);
+
+ /* Determine what is modified, if anything */
+ is_modified = lcs_om->position[0]->offset - original_start > 0
+ || lcs_om->position[1]->offset - modified_start > 0;
+
+ is_latest = lcs_ol->position[0]->offset - original_start > 0
+ || lcs_ol->position[1]->offset - latest_start > 0;
+
+ if (is_modified || is_latest)
+ {
+ modified_length = modified_sync - modified_start;
+ latest_length = latest_sync - latest_start;
+
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->original_start = original_start - 1;
+ (*diff_ref)->original_length = original_sync - original_start;
+ (*diff_ref)->modified_start = modified_start - 1;
+ (*diff_ref)->modified_length = modified_length;
+ (*diff_ref)->latest_start = latest_start - 1;
+ (*diff_ref)->latest_length = latest_length;
+ (*diff_ref)->resolved_diff = NULL;
+
+ if (is_modified && is_latest)
+ {
+ svn_diff__resolve_conflict(*diff_ref,
+ &position_list[1],
+ &position_list[2],
+ num_tokens,
+ pool);
+ }
+ else if (is_modified)
+ {
+ (*diff_ref)->type = svn_diff__type_diff_modified;
+ }
+ else
+ {
+ (*diff_ref)->type = svn_diff__type_diff_latest;
+ }
+
+ diff_ref = &(*diff_ref)->next;
+ }
+
+ /* Detect EOF */
+ if (lcs_om->length == 0 || lcs_ol->length == 0)
+ break;
+
+ modified_length = lcs_om->length
+ - (original_sync - lcs_om->position[0]->offset);
+ latest_length = lcs_ol->length
+ - (original_sync - lcs_ol->position[0]->offset);
+ common_length = modified_length < latest_length
+ ? modified_length : latest_length;
+
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_common;
+ (*diff_ref)->original_start = original_sync - 1;
+ (*diff_ref)->original_length = common_length;
+ (*diff_ref)->modified_start = modified_sync - 1;
+ (*diff_ref)->modified_length = common_length;
+ (*diff_ref)->latest_start = latest_sync - 1;
+ (*diff_ref)->latest_length = common_length;
+ (*diff_ref)->resolved_diff = NULL;
+
+ diff_ref = &(*diff_ref)->next;
+
+ /* Set the new offsets */
+ original_start = original_sync + common_length;
+ modified_start = modified_sync + common_length;
+ latest_start = latest_sync + common_length;
+
+ /* Make it easier for diff_common/conflict detection
+ by recording last lcs start positions
+ */
+ if (position_list[1]->offset < lcs_om->position[1]->offset)
+ position_list[1] = lcs_om->position[1];
+
+ if (position_list[2]->offset < lcs_ol->position[1]->offset)
+ position_list[2] = lcs_ol->position[1];
+
+ /* Make sure we are pointing to lcs entries beyond
+ * the range we just processed
+ */
+ while (original_start >= lcs_om->position[0]->offset + lcs_om->length
+ && lcs_om->length > 0)
+ {
+ lcs_om = lcs_om->next;
+ }
+
+ while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
+ && lcs_ol->length > 0)
+ {
+ lcs_ol = lcs_ol->next;
+ }
+ }
+
+ *diff_ref = NULL;
+ }
+
+ svn_pool_destroy(subpool);
+
+ return SVN_NO_ERROR;
+}
diff --git a/subversion/libsvn_diff/diff4.c b/subversion/libsvn_diff/diff4.c
new file mode 100644
index 000000000000..9f3cb8cd2c93
--- /dev/null
+++ b/subversion/libsvn_diff/diff4.c
@@ -0,0 +1,314 @@
+/*
+ * diff.c : routines for doing diffs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "svn_pools.h"
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+
+#include "diff.h"
+
+/*
+ * Variance adjustment rules:
+ *
+ * See notes/variance-adjusted-patching.html
+ *
+ * ###: Expand this comment to contain the full set of adjustment
+ * ###: rules instead of pointing to a webpage.
+ */
+
+/*
+ * In the text below consider the following:
+ *
+ * O = Original
+ * M = Modified
+ * L = Latest
+ * A = Ancestor
+ * X:Y = diff between X and Y
+ * X:Y:Z = 3-way diff between X, Y and Z
+ * P = O:L, possibly adjusted
+ *
+ * diff4 -- Variance adjusted diff algorithm
+ *
+ * 1. Create a diff O:L and call that P.
+ *
+ * 2. Morph P into a 3-way diff by performing the following
+ * transformation: O:L -> O:O:L.
+ *
+ * 3. Create a diff A:O.
+ *
+ * 4. Using A:O...
+ *
+ * #. Using M:A...
+ *
+ * #. Resolve conflicts...
+ *
+
+ 1. Out-range added line: decrement the line numbers in every hunk in P
+ that comes after the addition. This undoes the effect of the add, since
+ the add never happened in D.
+
+ 2. Out-range deleted line: increment the line numbers in every hunk in P
+ that comes after the deletion. This undoes the effect of the deletion,
+ since the deletion never happened in D.
+
+ 3. Out-range edited line: do nothing. Out-range edits are irrelevant to P.
+
+ 4. Added line in context range in P: remove the corresponding line from
+ the context, optionally replacing it with new context based on that
+ region in M, and adjust line numbers and mappings appropriately.
+
+ 5. Added line in affected text range in P: this is a dependency problem
+ -- part of the change T:18-T:19 depends on changes introduced to T after
+ B branched. There are several possible behaviors, depending on what the
+ user wants. One is to generate an informative error, stating that
+ T:18-T:19 depends on some other change (T:N-T:M, where N>=8, M<=18,
+ and M-N == 1); the exact revisions can be discovered automatically using
+ the same process as "cvs annotate", though it may take some time to do
+ so. Another option is to include the change in P, as an insertion of the
+ "after" version of the text, and adjust line numbers and mappings
+ accordingly. (And if all this isn't sounding a lot like a directory
+ merge algorithm, try drinking more of the Kool-Aid.) A third option is
+ to include it as an insertion, but with metadata (such as CVS-style
+ conflict markers) indicating that the line attempting to be patched
+ does not exist in B.
+
+ 6. Deleted line that is in-range in P: request another universe -- this
+ situation can't happen in ours.
+
+ 7. In-range edited line: reverse that edit in the "before" version of the
+ corresponding line in the appropriate hunk in P, to obtain the version of
+ the line that will be found in B when P is applied.
+*/
+
+
+static void
+adjust_diff(svn_diff_t *diff, svn_diff_t *adjust)
+{
+ svn_diff_t *hunk;
+ apr_off_t range_start;
+ apr_off_t range_end;
+ apr_off_t adjustment;
+
+ for (; adjust; adjust = adjust->next)
+ {
+ range_start = adjust->modified_start;
+ range_end = range_start + adjust->modified_length;
+ adjustment = adjust->original_length - adjust->modified_length;
+
+ /* No change in line count, so no modifications. [3, 7] */
+ if (adjustment == 0)
+ continue;
+
+ for (hunk = diff; hunk; hunk = hunk->next)
+ {
+ /* Changes are in the range before this hunk. Adjust the start
+ * of the hunk. [1, 2]
+ */
+ if (hunk->modified_start >= range_end)
+ {
+ hunk->modified_start += adjustment;
+ continue;
+ }
+
+ /* Changes are in the range beyond this hunk. No adjustments
+ * needed. [1, 2]
+ */
+ if (hunk->modified_start + hunk->modified_length <= range_start)
+ continue;
+
+ /* From here on changes are in the range of this hunk. */
+
+ /* This is a context hunk. Adjust the length. [4]
+ */
+ if (hunk->type == svn_diff__type_diff_modified)
+ {
+ hunk->modified_length += adjustment;
+ continue;
+ }
+
+ /* Mark as conflicted. This happens in the reverse case when a line
+ * is added in range and in the forward case when a line is deleted
+ * in range. [5 (reverse), 6 (forward)]
+ */
+ if (adjustment < 0)
+ hunk->type = svn_diff__type_conflict;
+
+ /* Adjust the length of this hunk (reverse the change). [5, 6] */
+ hunk->modified_length -= adjustment;
+ }
+ }
+}
+
+svn_error_t *
+svn_diff_diff4_2(svn_diff_t **diff,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ apr_pool_t *pool)
+{
+ svn_diff__tree_t *tree;
+ svn_diff__position_t *position_list[4];
+ svn_diff__token_index_t num_tokens;
+ svn_diff__token_index_t *token_counts[4];
+ svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+ svn_diff_datasource_modified,
+ svn_diff_datasource_latest,
+ svn_diff_datasource_ancestor};
+ svn_diff__lcs_t *lcs_ol;
+ svn_diff__lcs_t *lcs_adjust;
+ svn_diff_t *diff_ol;
+ svn_diff_t *diff_adjust;
+ svn_diff_t *hunk;
+ apr_pool_t *subpool;
+ apr_pool_t *subpool2;
+ apr_pool_t *subpool3;
+ apr_off_t prefix_lines = 0;
+ apr_off_t suffix_lines = 0;
+
+ *diff = NULL;
+
+ subpool = svn_pool_create(pool);
+ subpool2 = svn_pool_create(subpool);
+ subpool3 = svn_pool_create(subpool2);
+
+ svn_diff__tree_create(&tree, subpool3);
+
+ SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
+ datasource, 4));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[0],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_original,
+ prefix_lines,
+ subpool2));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[1],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_modified,
+ prefix_lines,
+ subpool));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[2],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_latest,
+ prefix_lines,
+ subpool));
+
+ SVN_ERR(svn_diff__get_tokens(&position_list[3],
+ tree,
+ diff_baton, vtable,
+ svn_diff_datasource_ancestor,
+ prefix_lines,
+ subpool2));
+
+ num_tokens = svn_diff__get_node_count(tree);
+
+ /* Get rid of the tokens, we don't need them to calc the diff */
+ if (vtable->token_discard_all != NULL)
+ vtable->token_discard_all(diff_baton);
+
+ /* We don't need the nodes in the tree either anymore, nor the tree itself */
+ svn_pool_clear(subpool3);
+
+ token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
+ subpool);
+ token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
+ subpool);
+ token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
+ subpool);
+ token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens,
+ subpool);
+
+ /* Get the lcs for original - latest */
+ lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+ token_counts[0], token_counts[2],
+ num_tokens, prefix_lines,
+ suffix_lines, subpool3);
+ diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
+
+ svn_pool_clear(subpool3);
+
+ for (hunk = diff_ol; hunk; hunk = hunk->next)
+ {
+ hunk->latest_start = hunk->modified_start;
+ hunk->latest_length = hunk->modified_length;
+ hunk->modified_start = hunk->original_start;
+ hunk->modified_length = hunk->original_length;
+
+ if (hunk->type == svn_diff__type_diff_modified)
+ hunk->type = svn_diff__type_diff_latest;
+ else
+ hunk->type = svn_diff__type_diff_modified;
+ }
+
+ /* Get the lcs for common ancestor - original
+ * Do reverse adjustements
+ */
+ lcs_adjust = svn_diff__lcs(position_list[3], position_list[2],
+ token_counts[3], token_counts[2],
+ num_tokens, prefix_lines,
+ suffix_lines, subpool3);
+ diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
+ adjust_diff(diff_ol, diff_adjust);
+
+ svn_pool_clear(subpool3);
+
+ /* Get the lcs for modified - common ancestor
+ * Do forward adjustments
+ */
+ lcs_adjust = svn_diff__lcs(position_list[1], position_list[3],
+ token_counts[1], token_counts[3],
+ num_tokens, prefix_lines,
+ suffix_lines, subpool3);
+ diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
+ adjust_diff(diff_ol, diff_adjust);
+
+ /* Get rid of the position lists for original and ancestor, and delete
+ * our scratchpool.
+ */
+ svn_pool_destroy(subpool2);
+
+ /* Now we try and resolve the conflicts we encountered */
+ for (hunk = diff_ol; hunk; hunk = hunk->next)
+ {
+ if (hunk->type == svn_diff__type_conflict)
+ {
+ svn_diff__resolve_conflict(hunk, &position_list[1],
+ &position_list[2], num_tokens, pool);
+ }
+ }
+
+ svn_pool_destroy(subpool);
+
+ *diff = diff_ol;
+
+ return SVN_NO_ERROR;
+}
diff --git a/subversion/libsvn_diff/diff_file.c b/subversion/libsvn_diff/diff_file.c
new file mode 100644
index 000000000000..e70c2f98a15d
--- /dev/null
+++ b/subversion/libsvn_diff/diff_file.c
@@ -0,0 +1,2414 @@
+/*
+ * diff_file.c : routines for doing diffs on files
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+#include <apr_file_io.h>
+#include <apr_file_info.h>
+#include <apr_time.h>
+#include <apr_mmap.h>
+#include <apr_getopt.h>
+
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+#include "svn_string.h"
+#include "svn_subst.h"
+#include "svn_io.h"
+#include "svn_utf.h"
+#include "svn_pools.h"
+#include "diff.h"
+#include "svn_private_config.h"
+#include "svn_path.h"
+#include "svn_ctype.h"
+
+#include "private/svn_utf_private.h"
+#include "private/svn_eol_private.h"
+#include "private/svn_dep_compat.h"
+#include "private/svn_adler32.h"
+#include "private/svn_diff_private.h"
+
+/* A token, i.e. a line read from a file. */
+typedef struct svn_diff__file_token_t
+{
+ /* Next token in free list. */
+ struct svn_diff__file_token_t *next;
+ svn_diff_datasource_e datasource;
+ /* Offset in the datasource. */
+ apr_off_t offset;
+ /* Offset of the normalized token (may skip leading whitespace) */
+ apr_off_t norm_offset;
+ /* Total length - before normalization. */
+ apr_off_t raw_length;
+ /* Total length - after normalization. */
+ apr_off_t length;
+} svn_diff__file_token_t;
+
+
+typedef struct svn_diff__file_baton_t
+{
+ const svn_diff_file_options_t *options;
+
+ struct file_info {
+ const char *path; /* path to this file, absolute or relative to CWD */
+
+ /* All the following fields are active while this datasource is open */
+ apr_file_t *file; /* handle of this file */
+ apr_off_t size; /* total raw size in bytes of this file */
+
+ /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
+ int chunk; /* the current chunk number, zero-based */
+ char *buffer; /* a buffer containing the current chunk */
+ char *curp; /* current position in the current chunk */
+ char *endp; /* next memory address after the current chunk */
+
+ svn_diff__normalize_state_t normalize_state;
+
+ /* Where the identical suffix starts in this datasource */
+ int suffix_start_chunk;
+ apr_off_t suffix_offset_in_chunk;
+ } files[4];
+
+ /* List of free tokens that may be reused. */
+ svn_diff__file_token_t *tokens;
+
+ apr_pool_t *pool;
+} svn_diff__file_baton_t;
+
+static int
+datasource_to_index(svn_diff_datasource_e datasource)
+{
+ switch (datasource)
+ {
+ case svn_diff_datasource_original:
+ return 0;
+
+ case svn_diff_datasource_modified:
+ return 1;
+
+ case svn_diff_datasource_latest:
+ return 2;
+
+ case svn_diff_datasource_ancestor:
+ return 3;
+ }
+
+ return -1;
+}
+
+/* Files are read in chunks of 128k. There is no support for this number
+ * whatsoever. If there is a number someone comes up with that has some
+ * argumentation, let's use that.
+ */
+/* If you change this number, update test_norm_offset(),
+ * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
+ */
+#define CHUNK_SHIFT 17
+#define CHUNK_SIZE (1 << CHUNK_SHIFT)
+
+#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
+#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
+#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
+
+
+/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
+ * *LENGTH. The actual bytes read are stored in *LENGTH on return.
+ */
+static APR_INLINE svn_error_t *
+read_chunk(apr_file_t *file, const char *path,
+ char *buffer, apr_off_t length,
+ apr_off_t offset, apr_pool_t *pool)
+{
+ /* XXX: The final offset may not be the one we asked for.
+ * XXX: Check.
+ */
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
+ return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
+ NULL, NULL, pool);
+}
+
+
+/* Map or read a file at PATH. *BUFFER will point to the file
+ * contents; if the file was mapped, *FILE and *MM will contain the
+ * mmap context; otherwise they will be NULL. SIZE will contain the
+ * file size. Allocate from POOL.
+ */
+#if APR_HAS_MMAP
+#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
+#define MMAP_T_ARG(NAME) &(NAME),
+#else
+#define MMAP_T_PARAM(NAME)
+#define MMAP_T_ARG(NAME)
+#endif
+
+static svn_error_t *
+map_or_read_file(apr_file_t **file,
+ MMAP_T_PARAM(mm)
+ char **buffer, apr_off_t *size,
+ const char *path, apr_pool_t *pool)
+{
+ apr_finfo_t finfo;
+ apr_status_t rv;
+
+ *buffer = NULL;
+
+ SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
+ SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
+
+#if APR_HAS_MMAP
+ if (finfo.size > APR_MMAP_THRESHOLD)
+ {
+ rv = apr_mmap_create(mm, *file, 0, (apr_size_t) finfo.size,
+ APR_MMAP_READ, pool);
+ if (rv == APR_SUCCESS)
+ {
+ *buffer = (*mm)->mm;
+ }
+
+ /* On failure we just fall through and try reading the file into
+ * memory instead.
+ */
+ }
+#endif /* APR_HAS_MMAP */
+
+ if (*buffer == NULL && finfo.size > 0)
+ {
+ *buffer = apr_palloc(pool, (apr_size_t) finfo.size);
+
+ SVN_ERR(svn_io_file_read_full2(*file, *buffer, (apr_size_t) finfo.size,
+ NULL, NULL, pool));
+
+ /* Since we have the entire contents of the file we can
+ * close it now.
+ */
+ SVN_ERR(svn_io_file_close(*file, pool));
+
+ *file = NULL;
+ }
+
+ *size = finfo.size;
+
+ return SVN_NO_ERROR;
+}
+
+
+/* For all files in the FILE array, increment the curp pointer. If a file
+ * points before the beginning of file, let it point at the first byte again.
+ * If the end of the current chunk is reached, read the next chunk in the
+ * buffer and point curp to the start of the chunk. If EOF is reached, set
+ * curp equal to endp to indicate EOF. */
+#define INCREMENT_POINTERS(all_files, files_len, pool) \
+ do { \
+ apr_size_t svn_macro__i; \
+ \
+ for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
+ { \
+ if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
+ (all_files)[svn_macro__i].curp++; \
+ else \
+ SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
+ } \
+ } while (0)
+
+
+/* For all files in the FILE array, decrement the curp pointer. If the
+ * start of a chunk is reached, read the previous chunk in the buffer and
+ * point curp to the last byte of the chunk. If the beginning of a FILE is
+ * reached, set chunk to -1 to indicate BOF. */
+#define DECREMENT_POINTERS(all_files, files_len, pool) \
+ do { \
+ apr_size_t svn_macro__i; \
+ \
+ for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
+ { \
+ if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
+ (all_files)[svn_macro__i].curp--; \
+ else \
+ SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
+ } \
+ } while (0)
+
+
+static svn_error_t *
+increment_chunk(struct file_info *file, apr_pool_t *pool)
+{
+ apr_off_t length;
+ apr_off_t last_chunk = offset_to_chunk(file->size);
+
+ if (file->chunk == -1)
+ {
+ /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
+ file->chunk = 0;
+ file->curp = file->buffer;
+ }
+ else if (file->chunk == last_chunk)
+ {
+ /* We are at the last chunk. Indicate EOF by setting curp == endp. */
+ file->curp = file->endp;
+ }
+ else
+ {
+ /* There are still chunks left. Read next chunk and reset pointers. */
+ file->chunk++;
+ length = file->chunk == last_chunk ?
+ offset_in_chunk(file->size) : CHUNK_SIZE;
+ SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+ length, chunk_to_offset(file->chunk),
+ pool));
+ file->endp = file->buffer + length;
+ file->curp = file->buffer;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+decrement_chunk(struct file_info *file, apr_pool_t *pool)
+{
+ if (file->chunk == 0)
+ {
+ /* We are already at the first chunk. Indicate BOF (Beginning Of File)
+ by setting chunk = -1 and curp = endp - 1. Both conditions are
+ important. They help the increment step to catch the BOF situation
+ in an efficient way. */
+ file->chunk--;
+ file->curp = file->endp - 1;
+ }
+ else
+ {
+ /* Read previous chunk and reset pointers. */
+ file->chunk--;
+ SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+ CHUNK_SIZE, chunk_to_offset(file->chunk),
+ pool));
+ file->endp = file->buffer + CHUNK_SIZE;
+ file->curp = file->endp - 1;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Check whether one of the FILEs has its pointers 'before' the beginning of
+ * the file (this can happen while scanning backwards). This is the case if
+ * one of them has chunk == -1. */
+static svn_boolean_t
+is_one_at_bof(struct file_info file[], apr_size_t file_len)
+{
+ apr_size_t i;
+
+ for (i = 0; i < file_len; i++)
+ if (file[i].chunk == -1)
+ return TRUE;
+
+ return FALSE;
+}
+
+/* Check whether one of the FILEs has its pointers at EOF (this is the case if
+ * one of them has curp == endp (this can only happen at the last chunk)) */
+static svn_boolean_t
+is_one_at_eof(struct file_info file[], apr_size_t file_len)
+{
+ apr_size_t i;
+
+ for (i = 0; i < file_len; i++)
+ if (file[i].curp == file[i].endp)
+ return TRUE;
+
+ return FALSE;
+}
+
+/* Quickly determine whether there is a eol char in CHUNK.
+ * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
+ */
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+static svn_boolean_t contains_eol(apr_uintptr_t chunk)
+{
+ apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
+ apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
+
+ r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
+ n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
+
+ return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
+}
+#endif
+
+/* Find the prefix which is identical between all elements of the FILE array.
+ * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
+ * set to TRUE if one of the FILEs reached its end while scanning prefix,
+ * i.e. at least one file consisted entirely of prefix. Otherwise,
+ * REACHED_ONE_EOF is set to FALSE.
+ *
+ * After this function is finished, the buffers, chunks, curp's and endp's
+ * of the FILEs are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
+ struct file_info file[], apr_size_t file_len,
+ apr_pool_t *pool)
+{
+ svn_boolean_t had_cr = FALSE;
+ svn_boolean_t is_match;
+ apr_off_t lines = 0;
+ apr_size_t i;
+
+ *reached_one_eof = FALSE;
+
+ for (i = 1, is_match = TRUE; i < file_len; i++)
+ is_match = is_match && *file[0].curp == *file[i].curp;
+ while (is_match)
+ {
+#if SVN_UNALIGNED_ACCESS_IS_OK
+ apr_ssize_t max_delta, delta;
+#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
+
+ /* ### TODO: see if we can take advantage of
+ diff options like ignore_eol_style or ignore_space. */
+ /* check for eol, and count */
+ if (*file[0].curp == '\r')
+ {
+ lines++;
+ had_cr = TRUE;
+ }
+ else if (*file[0].curp == '\n' && !had_cr)
+ {
+ lines++;
+ }
+ else
+ {
+ had_cr = FALSE;
+ }
+
+ INCREMENT_POINTERS(file, file_len, pool);
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+ /* Try to advance as far as possible with machine-word granularity.
+ * Determine how far we may advance with chunky ops without reaching
+ * endp for any of the files.
+ * Signedness is important here if curp gets close to endp.
+ */
+ max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
+ for (i = 1; i < file_len; i++)
+ {
+ delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
+ if (delta < max_delta)
+ max_delta = delta;
+ }
+
+ is_match = TRUE;
+ for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
+ {
+ apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
+ if (contains_eol(chunk))
+ break;
+
+ for (i = 1; i < file_len; i++)
+ if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
+ {
+ is_match = FALSE;
+ break;
+ }
+
+ if (! is_match)
+ break;
+ }
+
+ if (delta /* > 0*/)
+ {
+ /* We either found a mismatch or an EOL at or shortly behind curp+delta
+ * or we cannot proceed with chunky ops without exceeding endp.
+ * In any way, everything up to curp + delta is equal and not an EOL.
+ */
+ for (i = 0; i < file_len; i++)
+ file[i].curp += delta;
+
+ /* Skipped data without EOL markers, so last char was not a CR. */
+ had_cr = FALSE;
+ }
+#endif
+
+ *reached_one_eof = is_one_at_eof(file, file_len);
+ if (*reached_one_eof)
+ break;
+ else
+ for (i = 1, is_match = TRUE; i < file_len; i++)
+ is_match = is_match && *file[0].curp == *file[i].curp;
+ }
+
+ if (had_cr)
+ {
+ /* Check if we ended in the middle of a \r\n for one file, but \r for
+ another. If so, back up one byte, so the next loop will back up
+ the entire line. Also decrement lines, since we counted one
+ too many for the \r. */
+ svn_boolean_t ended_at_nonmatching_newline = FALSE;
+ for (i = 0; i < file_len; i++)
+ if (file[i].curp < file[i].endp)
+ ended_at_nonmatching_newline = ended_at_nonmatching_newline
+ || *file[i].curp == '\n';
+ if (ended_at_nonmatching_newline)
+ {
+ lines--;
+ DECREMENT_POINTERS(file, file_len, pool);
+ }
+ }
+
+ /* Back up one byte, so we point at the last identical byte */
+ DECREMENT_POINTERS(file, file_len, pool);
+
+ /* Back up to the last eol sequence (\n, \r\n or \r) */
+ while (!is_one_at_bof(file, file_len) &&
+ *file[0].curp != '\n' && *file[0].curp != '\r')
+ DECREMENT_POINTERS(file, file_len, pool);
+
+ /* Slide one byte forward, to point past the eol sequence */
+ INCREMENT_POINTERS(file, file_len, pool);
+
+ *prefix_lines = lines;
+
+ return SVN_NO_ERROR;
+}
+
+
+/* The number of identical suffix lines to keep with the middle section. These
+ * lines are not eliminated as suffix, and can be picked up by the token
+ * parsing and lcs steps. This is mainly for backward compatibility with
+ * the previous diff (and blame) output (if there are multiple diff solutions,
+ * our lcs algorithm prefers taking common lines from the start, rather than
+ * from the end. By giving it back some suffix lines, we give it some wiggle
+ * room to find the exact same diff as before).
+ *
+ * The number 50 is more or less arbitrary, based on some real-world tests
+ * with big files (and then doubling the required number to be on the safe
+ * side). This has a negligible effect on the power of the optimization. */
+/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
+#ifndef SUFFIX_LINES_TO_KEEP
+#define SUFFIX_LINES_TO_KEEP 50
+#endif
+
+/* Find the suffix which is identical between all elements of the FILE array.
+ * Return the number of suffix lines in SUFFIX_LINES.
+ *
+ * Before this function is called the FILEs' pointers and chunks should be
+ * positioned right after the identical prefix (which is the case after
+ * find_identical_prefix), so we can determine where suffix scanning should
+ * ultimately stop. */
+static svn_error_t *
+find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
+ apr_size_t file_len, apr_pool_t *pool)
+{
+ struct file_info file_for_suffix[4] = { { 0 } };
+ apr_off_t length[4];
+ apr_off_t suffix_min_chunk0;
+ apr_off_t suffix_min_offset0;
+ apr_off_t min_file_size;
+ int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
+ svn_boolean_t is_match;
+ apr_off_t lines = 0;
+ svn_boolean_t had_cr;
+ svn_boolean_t had_nl;
+ apr_size_t i;
+
+ /* Initialize file_for_suffix[].
+ Read last chunk, position curp at last byte. */
+ for (i = 0; i < file_len; i++)
+ {
+ file_for_suffix[i].path = file[i].path;
+ file_for_suffix[i].file = file[i].file;
+ file_for_suffix[i].size = file[i].size;
+ file_for_suffix[i].chunk =
+ (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
+ length[i] = offset_in_chunk(file_for_suffix[i].size);
+ if (length[i] == 0)
+ {
+ /* last chunk is an empty chunk -> start at next-to-last chunk */
+ file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
+ length[i] = CHUNK_SIZE;
+ }
+
+ if (file_for_suffix[i].chunk == file[i].chunk)
+ {
+ /* Prefix ended in last chunk, so we can reuse the prefix buffer */
+ file_for_suffix[i].buffer = file[i].buffer;
+ }
+ else
+ {
+ /* There is at least more than 1 chunk,
+ so allocate full chunk size buffer */
+ file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
+ SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
+ file_for_suffix[i].buffer, length[i],
+ chunk_to_offset(file_for_suffix[i].chunk),
+ pool));
+ }
+ file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
+ file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
+ }
+
+ /* Get the chunk and pointer offset (for file[0]) at which we should stop
+ scanning backward for the identical suffix, i.e. when we reach prefix. */
+ suffix_min_chunk0 = file[0].chunk;
+ suffix_min_offset0 = file[0].curp - file[0].buffer;
+
+ /* Compensate if other files are smaller than file[0] */
+ for (i = 1, min_file_size = file[0].size; i < file_len; i++)
+ if (file[i].size < min_file_size)
+ min_file_size = file[i].size;
+ if (file[0].size > min_file_size)
+ {
+ suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
+ suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
+ }
+
+ /* Scan backwards until mismatch or until we reach the prefix. */
+ for (i = 1, is_match = TRUE; i < file_len; i++)
+ is_match = is_match
+ && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+ if (is_match && *file_for_suffix[0].curp != '\r'
+ && *file_for_suffix[0].curp != '\n')
+ /* Count an extra line for the last line not ending in an eol. */
+ lines++;
+
+ had_nl = FALSE;
+ while (is_match)
+ {
+ svn_boolean_t reached_prefix;
+#if SVN_UNALIGNED_ACCESS_IS_OK
+ /* Initialize the minimum pointer positions. */
+ const char *min_curp[4];
+ svn_boolean_t can_read_word;
+#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
+
+ /* ### TODO: see if we can take advantage of
+ diff options like ignore_eol_style or ignore_space. */
+ /* check for eol, and count */
+ if (*file_for_suffix[0].curp == '\n')
+ {
+ lines++;
+ had_nl = TRUE;
+ }
+ else if (*file_for_suffix[0].curp == '\r' && !had_nl)
+ {
+ lines++;
+ }
+ else
+ {
+ had_nl = FALSE;
+ }
+
+ DECREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+ for (i = 0; i < file_len; i++)
+ min_curp[i] = file_for_suffix[i].buffer;
+
+ /* If we are in the same chunk that contains the last part of the common
+ prefix, use the min_curp[0] pointer to make sure we don't get a
+ suffix that overlaps the already determined common prefix. */
+ if (file_for_suffix[0].chunk == suffix_min_chunk0)
+ min_curp[0] += suffix_min_offset0;
+
+ /* Scan quickly by reading with machine-word granularity. */
+ for (i = 0, can_read_word = TRUE; i < file_len; i++)
+ can_read_word = can_read_word
+ && ( (file_for_suffix[i].curp + 1
+ - sizeof(apr_uintptr_t))
+ > min_curp[i]);
+ while (can_read_word)
+ {
+ apr_uintptr_t chunk;
+
+ /* For each file curp is positioned at the current byte, but we
+ want to examine the current byte and the ones before the current
+ location as one machine word. */
+
+ chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
+ - sizeof(apr_uintptr_t));
+ if (contains_eol(chunk))
+ break;
+
+ for (i = 1, is_match = TRUE; i < file_len; i++)
+ is_match = is_match
+ && ( chunk
+ == *(const apr_uintptr_t *)
+ (file_for_suffix[i].curp + 1
+ - sizeof(apr_uintptr_t)));
+
+ if (! is_match)
+ break;
+
+ for (i = 0; i < file_len; i++)
+ {
+ file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
+ can_read_word = can_read_word
+ && ( (file_for_suffix[i].curp + 1
+ - sizeof(apr_uintptr_t))
+ > min_curp[i]);
+ }
+
+ /* We skipped some bytes, so there are no closing EOLs */
+ had_nl = FALSE;
+ had_cr = FALSE;
+ }
+
+ /* The > min_curp[i] check leaves at least one final byte for checking
+ in the non block optimized case below. */
+#endif
+
+ reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
+ && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
+ == suffix_min_offset0;
+ if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
+ break;
+
+ is_match = TRUE;
+ for (i = 1; i < file_len; i++)
+ is_match = is_match
+ && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+ }
+
+ /* Slide one byte forward, to point at the first byte of identical suffix */
+ INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+ /* Slide forward until we find an eol sequence to add the rest of the line
+ we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
+ one file reaches its end. */
+ do
+ {
+ had_cr = FALSE;
+ while (!is_one_at_eof(file_for_suffix, file_len)
+ && *file_for_suffix[0].curp != '\n'
+ && *file_for_suffix[0].curp != '\r')
+ INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+ /* Slide one or two more bytes, to point past the eol. */
+ if (!is_one_at_eof(file_for_suffix, file_len)
+ && *file_for_suffix[0].curp == '\r')
+ {
+ lines--;
+ had_cr = TRUE;
+ INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+ }
+ if (!is_one_at_eof(file_for_suffix, file_len)
+ && *file_for_suffix[0].curp == '\n')
+ {
+ if (!had_cr)
+ lines--;
+ INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+ }
+ }
+ while (!is_one_at_eof(file_for_suffix, file_len)
+ && suffix_lines_to_keep--);
+
+ if (is_one_at_eof(file_for_suffix, file_len))
+ lines = 0;
+
+ /* Save the final suffix information in the original file_info */
+ for (i = 0; i < file_len; i++)
+ {
+ file[i].suffix_start_chunk = file_for_suffix[i].chunk;
+ file[i].suffix_offset_in_chunk =
+ file_for_suffix[i].curp - file_for_suffix[i].buffer;
+ }
+
+ *suffix_lines = lines;
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Let FILE stand for the array of file_info struct elements of BATON->files
+ * that are indexed by the elements of the DATASOURCE array.
+ * BATON's type is (svn_diff__file_baton_t *).
+ *
+ * For each file in the FILE array, open the file at FILE.path; initialize
+ * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
+ * buffer and read the first chunk. Then find the prefix and suffix lines
+ * which are identical between all the files. Return the number of identical
+ * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
+ * SUFFIX_LINES.
+ *
+ * Finding the identical prefix and suffix allows us to exclude those from the
+ * rest of the diff algorithm, which increases performance by reducing the
+ * problem space.
+ *
+ * Implements svn_diff_fns2_t::datasources_open. */
+static svn_error_t *
+datasources_open(void *baton,
+ apr_off_t *prefix_lines,
+ apr_off_t *suffix_lines,
+ const svn_diff_datasource_e *datasources,
+ apr_size_t datasources_len)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+ struct file_info files[4];
+ apr_finfo_t finfo[4];
+ apr_off_t length[4];
+#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
+ svn_boolean_t reached_one_eof;
+#endif
+ apr_size_t i;
+
+ /* Make sure prefix_lines and suffix_lines are set correctly, even if we
+ * exit early because one of the files is empty. */
+ *prefix_lines = 0;
+ *suffix_lines = 0;
+
+ /* Open datasources and read first chunk */
+ for (i = 0; i < datasources_len; i++)
+ {
+ struct file_info *file
+ = &file_baton->files[datasource_to_index(datasources[i])];
+ SVN_ERR(svn_io_file_open(&file->file, file->path,
+ APR_READ, APR_OS_DEFAULT, file_baton->pool));
+ SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
+ file->file, file_baton->pool));
+ file->size = finfo[i].size;
+ length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
+ file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
+ SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+ length[i], 0, file_baton->pool));
+ file->endp = file->buffer + length[i];
+ file->curp = file->buffer;
+ /* Set suffix_start_chunk to a guard value, so if suffix scanning is
+ * skipped because one of the files is empty, or because of
+ * reached_one_eof, we can still easily check for the suffix during
+ * token reading (datasource_get_next_token). */
+ file->suffix_start_chunk = -1;
+
+ files[i] = *file;
+ }
+
+ for (i = 0; i < datasources_len; i++)
+ if (length[i] == 0)
+ /* There will not be any identical prefix/suffix, so we're done. */
+ return SVN_NO_ERROR;
+
+#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
+
+ SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
+ files, datasources_len, file_baton->pool));
+
+ if (!reached_one_eof)
+ /* No file consisted totally of identical prefix,
+ * so there may be some identical suffix. */
+ SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
+ file_baton->pool));
+
+#endif
+
+ /* Copy local results back to baton. */
+ for (i = 0; i < datasources_len; i++)
+ file_baton->files[datasource_to_index(datasources[i])] = files[i];
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Implements svn_diff_fns2_t::datasource_close */
+static svn_error_t *
+datasource_close(void *baton, svn_diff_datasource_e datasource)
+{
+ /* Do nothing. The compare_token function needs previous datasources
+ * to stay available until all datasources are processed.
+ */
+
+ return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_fns2_t::datasource_get_next_token */
+static svn_error_t *
+datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
+ svn_diff_datasource_e datasource)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+ svn_diff__file_token_t *file_token;
+ struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
+ char *endp;
+ char *curp;
+ char *eol;
+ apr_off_t last_chunk;
+ apr_off_t length;
+ apr_uint32_t h = 0;
+ /* Did the last chunk end in a CR character? */
+ svn_boolean_t had_cr = FALSE;
+
+ *token = NULL;
+
+ curp = file->curp;
+ endp = file->endp;
+
+ last_chunk = offset_to_chunk(file->size);
+
+ /* Are we already at the end of a chunk? */
+ if (curp == endp)
+ {
+ /* Are we at EOF */
+ if (last_chunk == file->chunk)
+ return SVN_NO_ERROR; /* EOF */
+
+ /* Or right before an identical suffix in the next chunk? */
+ if (file->chunk + 1 == file->suffix_start_chunk
+ && file->suffix_offset_in_chunk == 0)
+ return SVN_NO_ERROR;
+ }
+
+ /* Stop when we encounter the identical suffix. If suffix scanning was not
+ * performed, suffix_start_chunk will be -1, so this condition will never
+ * be true. */
+ if (file->chunk == file->suffix_start_chunk
+ && (curp - file->buffer) == file->suffix_offset_in_chunk)
+ return SVN_NO_ERROR;
+
+ /* Allocate a new token, or fetch one from the "reusable tokens" list. */
+ file_token = file_baton->tokens;
+ if (file_token)
+ {
+ file_baton->tokens = file_token->next;
+ }
+ else
+ {
+ file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
+ }
+
+ file_token->datasource = datasource;
+ file_token->offset = chunk_to_offset(file->chunk)
+ + (curp - file->buffer);
+ file_token->norm_offset = file_token->offset;
+ file_token->raw_length = 0;
+ file_token->length = 0;
+
+ while (1)
+ {
+ eol = svn_eol__find_eol_start(curp, endp - curp);
+ if (eol)
+ {
+ had_cr = (*eol == '\r');
+ eol++;
+ /* If we have the whole eol sequence in the chunk... */
+ if (!(had_cr && eol == endp))
+ {
+ /* Also skip past the '\n' in an '\r\n' sequence. */
+ if (had_cr && *eol == '\n')
+ eol++;
+ break;
+ }
+ }
+
+ if (file->chunk == last_chunk)
+ {
+ eol = endp;
+ break;
+ }
+
+ length = endp - curp;
+ file_token->raw_length += length;
+ {
+ char *c = curp;
+
+ svn_diff__normalize_buffer(&c, &length,
+ &file->normalize_state,
+ curp, file_baton->options);
+ if (file_token->length == 0)
+ {
+ /* When we are reading the first part of the token, move the
+ normalized offset past leading ignored characters, if any. */
+ file_token->norm_offset += (c - curp);
+ }
+ file_token->length += length;
+ h = svn__adler32(h, c, length);
+ }
+
+ curp = endp = file->buffer;
+ file->chunk++;
+ length = file->chunk == last_chunk ?
+ offset_in_chunk(file->size) : CHUNK_SIZE;
+ endp += length;
+ file->endp = endp;
+
+ /* Issue #4283: Normally we should have checked for reaching the skipped
+ suffix here, but because we assume that a suffix always starts on a
+ line and token boundary we rely on catching the suffix earlier in this
+ function.
+
+ When changing things here, make sure the whitespace settings are
+ applied, or we mught not reach the exact suffix boundary as token
+ boundary. */
+ SVN_ERR(read_chunk(file->file, file->path,
+ curp, length,
+ chunk_to_offset(file->chunk),
+ file_baton->pool));
+
+ /* If the last chunk ended in a CR, we're done. */
+ if (had_cr)
+ {
+ eol = curp;
+ if (*curp == '\n')
+ ++eol;
+ break;
+ }
+ }
+
+ length = eol - curp;
+ file_token->raw_length += length;
+ file->curp = eol;
+
+ /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
+ * with a spurious empty token. Avoid returning it.
+ * Note that we use the unnormalized length; we don't want a line containing
+ * only spaces (and no trailing newline) to appear like a non-existent
+ * line. */
+ if (file_token->raw_length > 0)
+ {
+ char *c = curp;
+ svn_diff__normalize_buffer(&c, &length,
+ &file->normalize_state,
+ curp, file_baton->options);
+ if (file_token->length == 0)
+ {
+ /* When we are reading the first part of the token, move the
+ normalized offset past leading ignored characters, if any. */
+ file_token->norm_offset += (c - curp);
+ }
+
+ file_token->length += length;
+
+ *hash = svn__adler32(h, c, length);
+ *token = file_token;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+#define COMPARE_CHUNK_SIZE 4096
+
+/* Implements svn_diff_fns2_t::token_compare */
+static svn_error_t *
+token_compare(void *baton, void *token1, void *token2, int *compare)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+ svn_diff__file_token_t *file_token[2];
+ char buffer[2][COMPARE_CHUNK_SIZE];
+ char *bufp[2];
+ apr_off_t offset[2];
+ struct file_info *file[2];
+ apr_off_t length[2];
+ apr_off_t total_length;
+ /* How much is left to read of each token from the file. */
+ apr_off_t raw_length[2];
+ int i;
+ svn_diff__normalize_state_t state[2];
+
+ file_token[0] = token1;
+ file_token[1] = token2;
+ if (file_token[0]->length < file_token[1]->length)
+ {
+ *compare = -1;
+ return SVN_NO_ERROR;
+ }
+
+ if (file_token[0]->length > file_token[1]->length)
+ {
+ *compare = 1;
+ return SVN_NO_ERROR;
+ }
+
+ total_length = file_token[0]->length;
+ if (total_length == 0)
+ {
+ *compare = 0;
+ return SVN_NO_ERROR;
+ }
+
+ for (i = 0; i < 2; ++i)
+ {
+ int idx = datasource_to_index(file_token[i]->datasource);
+
+ file[i] = &file_baton->files[idx];
+ offset[i] = file_token[i]->norm_offset;
+ state[i] = svn_diff__normalize_state_normal;
+
+ if (offset_to_chunk(offset[i]) == file[i]->chunk)
+ {
+ /* If the start of the token is in memory, the entire token is
+ * in memory.
+ */
+ bufp[i] = file[i]->buffer;
+ bufp[i] += offset_in_chunk(offset[i]);
+
+ length[i] = total_length;
+ raw_length[i] = 0;
+ }
+ else
+ {
+ apr_off_t skipped;
+
+ length[i] = 0;
+
+ /* When we skipped the first part of the token via the whitespace
+ normalization we must reduce the raw length of the token */
+ skipped = (file_token[i]->norm_offset - file_token[i]->offset);
+
+ raw_length[i] = file_token[i]->raw_length - skipped;
+ }
+ }
+
+ do
+ {
+ apr_off_t len;
+ for (i = 0; i < 2; i++)
+ {
+ if (length[i] == 0)
+ {
+ /* Error if raw_length is 0, that's an unexpected change
+ * of the file that can happen when ingoring whitespace
+ * and that can lead to an infinite loop. */
+ if (raw_length[i] == 0)
+ return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
+ NULL,
+ _("The file '%s' changed unexpectedly"
+ " during diff"),
+ file[i]->path);
+
+ /* Read a chunk from disk into a buffer */
+ bufp[i] = buffer[i];
+ length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
+ COMPARE_CHUNK_SIZE : raw_length[i];
+
+ SVN_ERR(read_chunk(file[i]->file,
+ file[i]->path,
+ bufp[i], length[i], offset[i],
+ file_baton->pool));
+ offset[i] += length[i];
+ raw_length[i] -= length[i];
+ /* bufp[i] gets reset to buffer[i] before reading each chunk,
+ so, overwriting it isn't a problem */
+ svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
+ bufp[i], file_baton->options);
+
+ /* assert(length[i] == file_token[i]->length); */
+ }
+ }
+
+ len = length[0] > length[1] ? length[1] : length[0];
+
+ /* Compare two chunks (that could be entire tokens if they both reside
+ * in memory).
+ */
+ *compare = memcmp(bufp[0], bufp[1], (size_t) len);
+ if (*compare != 0)
+ return SVN_NO_ERROR;
+
+ total_length -= len;
+ length[0] -= len;
+ length[1] -= len;
+ bufp[0] += len;
+ bufp[1] += len;
+ }
+ while(total_length > 0);
+
+ *compare = 0;
+ return SVN_NO_ERROR;
+}
+
+
+/* Implements svn_diff_fns2_t::token_discard */
+static void
+token_discard(void *baton, void *token)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+ svn_diff__file_token_t *file_token = token;
+
+ /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
+ file_token->next = file_baton->tokens;
+ file_baton->tokens = file_token;
+}
+
+
+/* Implements svn_diff_fns2_t::token_discard_all */
+static void
+token_discard_all(void *baton)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+
+ /* Discard all memory in use by the tokens, and close all open files. */
+ svn_pool_clear(file_baton->pool);
+}
+
+
+static const svn_diff_fns2_t svn_diff__file_vtable =
+{
+ datasources_open,
+ datasource_close,
+ datasource_get_next_token,
+ token_compare,
+ token_discard,
+ token_discard_all
+};
+
+/* Id for the --ignore-eol-style option, which doesn't have a short name. */
+#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
+
+/* Options supported by svn_diff_file_options_parse(). */
+static const apr_getopt_option_t diff_options[] =
+{
+ { "ignore-space-change", 'b', 0, NULL },
+ { "ignore-all-space", 'w', 0, NULL },
+ { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
+ { "show-c-function", 'p', 0, NULL },
+ /* ### For compatibility; we don't support the argument to -u, because
+ * ### we don't have optional argument support. */
+ { "unified", 'u', 0, NULL },
+ { NULL, 0, 0, NULL }
+};
+
+svn_diff_file_options_t *
+svn_diff_file_options_create(apr_pool_t *pool)
+{
+ return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
+}
+
+/* A baton for use with opt_parsing_error_func(). */
+struct opt_parsing_error_baton_t
+{
+ svn_error_t *err;
+ apr_pool_t *pool;
+};
+
+/* Store an error message from apr_getopt_long(). Set BATON->err to a new
+ * error with a message generated from FMT and the remaining arguments.
+ * Implements apr_getopt_err_fn_t. */
+static void
+opt_parsing_error_func(void *baton,
+ const char *fmt, ...)
+{
+ struct opt_parsing_error_baton_t *b = baton;
+ const char *message;
+ va_list ap;
+
+ va_start(ap, fmt);
+ message = apr_pvsprintf(b->pool, fmt, ap);
+ va_end(ap);
+
+ /* Skip leading ": " (if present, which it always is in known cases). */
+ if (strncmp(message, ": ", 2) == 0)
+ message += 2;
+
+ b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
+}
+
+svn_error_t *
+svn_diff_file_options_parse(svn_diff_file_options_t *options,
+ const apr_array_header_t *args,
+ apr_pool_t *pool)
+{
+ apr_getopt_t *os;
+ struct opt_parsing_error_baton_t opt_parsing_error_baton;
+ /* Make room for each option (starting at index 1) plus trailing NULL. */
+ const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
+
+ opt_parsing_error_baton.err = NULL;
+ opt_parsing_error_baton.pool = pool;
+
+ argv[0] = "";
+ memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
+ argv[args->nelts + 1] = NULL;
+
+ apr_getopt_init(&os, pool, args->nelts + 1, argv);
+
+ /* Capture any error message from apr_getopt_long(). This will typically
+ * say which option is wrong, which we would not otherwise know. */
+ os->errfn = opt_parsing_error_func;
+ os->errarg = &opt_parsing_error_baton;
+
+ while (1)
+ {
+ const char *opt_arg;
+ int opt_id;
+ apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
+
+ if (APR_STATUS_IS_EOF(err))
+ break;
+ if (err)
+ /* Wrap apr_getopt_long()'s error message. Its doc string implies
+ * it always will produce one, but never mind if it doesn't. Avoid
+ * using the message associated with the return code ERR, because
+ * it refers to the "command line" which may be misleading here. */
+ return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
+ opt_parsing_error_baton.err,
+ _("Error in options to internal diff"));
+
+ switch (opt_id)
+ {
+ case 'b':
+ /* -w takes precedence over -b. */
+ if (! options->ignore_space)
+ options->ignore_space = svn_diff_file_ignore_space_change;
+ break;
+ case 'w':
+ options->ignore_space = svn_diff_file_ignore_space_all;
+ break;
+ case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
+ options->ignore_eol_style = TRUE;
+ break;
+ case 'p':
+ options->show_c_function = TRUE;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Check for spurious arguments. */
+ if (os->ind < os->argc)
+ return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
+ _("Invalid argument '%s' in diff options"),
+ os->argv[os->ind]);
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_file_diff_2(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ svn_diff__file_baton_t baton = { 0 };
+
+ baton.options = options;
+ baton.files[0].path = original;
+ baton.files[1].path = modified;
+ baton.pool = svn_pool_create(pool);
+
+ SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
+
+ svn_pool_destroy(baton.pool);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_file_diff3_2(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ const char *latest,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ svn_diff__file_baton_t baton = { 0 };
+
+ baton.options = options;
+ baton.files[0].path = original;
+ baton.files[1].path = modified;
+ baton.files[2].path = latest;
+ baton.pool = svn_pool_create(pool);
+
+ SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
+
+ svn_pool_destroy(baton.pool);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_file_diff4_2(svn_diff_t **diff,
+ const char *original,
+ const char *modified,
+ const char *latest,
+ const char *ancestor,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ svn_diff__file_baton_t baton = { 0 };
+
+ baton.options = options;
+ baton.files[0].path = original;
+ baton.files[1].path = modified;
+ baton.files[2].path = latest;
+ baton.files[3].path = ancestor;
+ baton.pool = svn_pool_create(pool);
+
+ SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
+
+ svn_pool_destroy(baton.pool);
+ return SVN_NO_ERROR;
+}
+
+
+/** Display unified context diffs **/
+
+/* Maximum length of the extra context to show when show_c_function is set.
+ * GNU diff uses 40, let's be brave and use 50 instead. */
+#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
+typedef struct svn_diff__file_output_baton_t
+{
+ svn_stream_t *output_stream;
+ const char *header_encoding;
+
+ /* Cached markers, in header_encoding. */
+ const char *context_str;
+ const char *delete_str;
+ const char *insert_str;
+
+ const char *path[2];
+ apr_file_t *file[2];
+
+ apr_off_t current_line[2];
+
+ char buffer[2][4096];
+ apr_size_t length[2];
+ char *curp[2];
+
+ apr_off_t hunk_start[2];
+ apr_off_t hunk_length[2];
+ svn_stringbuf_t *hunk;
+
+ /* Should we emit C functions in the unified diff header */
+ svn_boolean_t show_c_function;
+ /* Extra strings to skip over if we match. */
+ apr_array_header_t *extra_skip_match;
+ /* "Context" to append to the @@ line when the show_c_function option
+ * is set. */
+ svn_stringbuf_t *extra_context;
+ /* Extra context for the current hunk. */
+ char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
+
+ apr_pool_t *pool;
+} svn_diff__file_output_baton_t;
+
+typedef enum svn_diff__file_output_unified_type_e
+{
+ svn_diff__file_output_unified_skip,
+ svn_diff__file_output_unified_context,
+ svn_diff__file_output_unified_delete,
+ svn_diff__file_output_unified_insert
+} svn_diff__file_output_unified_type_e;
+
+
+static svn_error_t *
+output_unified_line(svn_diff__file_output_baton_t *baton,
+ svn_diff__file_output_unified_type_e type, int idx)
+{
+ char *curp;
+ char *eol;
+ apr_size_t length;
+ svn_error_t *err;
+ svn_boolean_t bytes_processed = FALSE;
+ svn_boolean_t had_cr = FALSE;
+ /* Are we collecting extra context? */
+ svn_boolean_t collect_extra = FALSE;
+
+ length = baton->length[idx];
+ curp = baton->curp[idx];
+
+ /* Lazily update the current line even if we're at EOF.
+ * This way we fake output of context at EOF
+ */
+ baton->current_line[idx]++;
+
+ if (length == 0 && apr_file_eof(baton->file[idx]))
+ {
+ return SVN_NO_ERROR;
+ }
+
+ do
+ {
+ if (length > 0)
+ {
+ if (!bytes_processed)
+ {
+ switch (type)
+ {
+ case svn_diff__file_output_unified_context:
+ svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
+ baton->hunk_length[0]++;
+ baton->hunk_length[1]++;
+ break;
+ case svn_diff__file_output_unified_delete:
+ svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
+ baton->hunk_length[0]++;
+ break;
+ case svn_diff__file_output_unified_insert:
+ svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
+ baton->hunk_length[1]++;
+ break;
+ default:
+ break;
+ }
+
+ if (baton->show_c_function
+ && (type == svn_diff__file_output_unified_skip
+ || type == svn_diff__file_output_unified_context)
+ && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
+ && !svn_cstring_match_glob_list(curp,
+ baton->extra_skip_match))
+ {
+ svn_stringbuf_setempty(baton->extra_context);
+ collect_extra = TRUE;
+ }
+ }
+
+ eol = svn_eol__find_eol_start(curp, length);
+
+ if (eol != NULL)
+ {
+ apr_size_t len;
+
+ had_cr = (*eol == '\r');
+ eol++;
+ len = (apr_size_t)(eol - curp);
+
+ if (! had_cr || len < length)
+ {
+ if (had_cr && *eol == '\n')
+ {
+ ++eol;
+ ++len;
+ }
+
+ length -= len;
+
+ if (type != svn_diff__file_output_unified_skip)
+ {
+ svn_stringbuf_appendbytes(baton->hunk, curp, len);
+ }
+ if (collect_extra)
+ {
+ svn_stringbuf_appendbytes(baton->extra_context,
+ curp, len);
+ }
+
+ baton->curp[idx] = eol;
+ baton->length[idx] = length;
+
+ err = SVN_NO_ERROR;
+
+ break;
+ }
+ }
+
+ if (type != svn_diff__file_output_unified_skip)
+ {
+ svn_stringbuf_appendbytes(baton->hunk, curp, length);
+ }
+
+ if (collect_extra)
+ {
+ svn_stringbuf_appendbytes(baton->extra_context, curp, length);
+ }
+
+ bytes_processed = TRUE;
+ }
+
+ curp = baton->buffer[idx];
+ length = sizeof(baton->buffer[idx]);
+
+ err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
+
+ /* If the last chunk ended with a CR, we look for an LF at the start
+ of this chunk. */
+ if (had_cr)
+ {
+ if (! err && length > 0 && *curp == '\n')
+ {
+ if (type != svn_diff__file_output_unified_skip)
+ {
+ svn_stringbuf_appendbyte(baton->hunk, *curp);
+ }
+ /* We don't append the LF to extra_context, since it would
+ * just be stripped anyway. */
+ ++curp;
+ --length;
+ }
+
+ baton->curp[idx] = curp;
+ baton->length[idx] = length;
+
+ break;
+ }
+ }
+ while (! err);
+
+ if (err && ! APR_STATUS_IS_EOF(err->apr_err))
+ return err;
+
+ if (err && APR_STATUS_IS_EOF(err->apr_err))
+ {
+ svn_error_clear(err);
+ /* Special case if we reach the end of file AND the last line is in the
+ changed range AND the file doesn't end with a newline */
+ if (bytes_processed && (type != svn_diff__file_output_unified_skip)
+ && ! had_cr)
+ {
+ SVN_ERR(svn_diff__unified_append_no_newline_msg(
+ baton->hunk, baton->header_encoding, baton->pool));
+ }
+
+ baton->length[idx] = 0;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+static APR_INLINE svn_error_t *
+output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
+ int source,
+ svn_diff__file_output_unified_type_e type,
+ apr_off_t until)
+{
+ while (output_baton->current_line[source] < until)
+ {
+ SVN_ERR(output_unified_line(output_baton, type, source));
+ }
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
+{
+ apr_off_t target_line;
+ apr_size_t hunk_len;
+ apr_off_t old_start;
+ apr_off_t new_start;
+
+ if (svn_stringbuf_isempty(baton->hunk))
+ {
+ /* Nothing to flush */
+ return SVN_NO_ERROR;
+ }
+
+ target_line = baton->hunk_start[0] + baton->hunk_length[0]
+ + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+
+ /* Add trailing context to the hunk */
+ SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
+ svn_diff__file_output_unified_context,
+ target_line));
+
+ old_start = baton->hunk_start[0];
+ new_start = baton->hunk_start[1];
+
+ /* If the file is non-empty, convert the line indexes from
+ zero based to one based */
+ if (baton->hunk_length[0])
+ old_start++;
+ if (baton->hunk_length[1])
+ new_start++;
+
+ /* Write the hunk header */
+ SVN_ERR(svn_diff__unified_write_hunk_header(
+ baton->output_stream, baton->header_encoding, "@@",
+ old_start, baton->hunk_length[0],
+ new_start, baton->hunk_length[1],
+ baton->hunk_extra_context,
+ baton->pool));
+
+ /* Output the hunk content */
+ hunk_len = baton->hunk->len;
+ SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
+ &hunk_len));
+
+ /* Prepare for the next hunk */
+ baton->hunk_length[0] = 0;
+ baton->hunk_length[1] = 0;
+ baton->hunk_start[0] = 0;
+ baton->hunk_start[1] = 0;
+ svn_stringbuf_setempty(baton->hunk);
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+output_unified_diff_modified(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ svn_diff__file_output_baton_t *output_baton = baton;
+ apr_off_t context_prefix_length;
+ apr_off_t prev_context_end;
+ svn_boolean_t init_hunk = FALSE;
+
+ if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
+ context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ else
+ context_prefix_length = original_start;
+
+ /* Calculate where the previous hunk will end if we would write it now
+ (including the necessary context at the end) */
+ if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
+ {
+ prev_context_end = output_baton->hunk_start[0]
+ + output_baton->hunk_length[0]
+ + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ }
+ else
+ {
+ prev_context_end = -1;
+
+ if (output_baton->hunk_start[0] == 0
+ && (original_length > 0 || modified_length > 0))
+ init_hunk = TRUE;
+ }
+
+ /* If the changed range is far enough from the previous range, flush the current
+ hunk. */
+ {
+ apr_off_t new_hunk_start = (original_start - context_prefix_length);
+
+ if (output_baton->current_line[0] < new_hunk_start
+ && prev_context_end <= new_hunk_start)
+ {
+ SVN_ERR(output_unified_flush_hunk(output_baton));
+ init_hunk = TRUE;
+ }
+ else if (output_baton->hunk_length[0] > 0
+ || output_baton->hunk_length[1] > 0)
+ {
+ /* We extend the current hunk */
+
+
+ /* Original: Output the context preceding the changed range */
+ SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
+ svn_diff__file_output_unified_context,
+ original_start));
+ }
+ }
+
+ /* Original: Skip lines until we are at the beginning of the context we want
+ to display */
+ SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
+ svn_diff__file_output_unified_skip,
+ original_start - context_prefix_length));
+
+ /* Note that the above skip stores data for the show_c_function support below */
+
+ if (init_hunk)
+ {
+ SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
+ && output_baton->hunk_length[1] == 0);
+
+ output_baton->hunk_start[0] = original_start - context_prefix_length;
+ output_baton->hunk_start[1] = modified_start - context_prefix_length;
+ }
+
+ if (init_hunk && output_baton->show_c_function)
+ {
+ apr_size_t p;
+ const char *invalid_character;
+
+ /* Save the extra context for later use.
+ * Note that the last byte of the hunk_extra_context array is never
+ * touched after it is zero-initialized, so the array is always
+ * 0-terminated. */
+ strncpy(output_baton->hunk_extra_context,
+ output_baton->extra_context->data,
+ SVN_DIFF__EXTRA_CONTEXT_LENGTH);
+ /* Trim whitespace at the end, most notably to get rid of any
+ * newline characters. */
+ p = strlen(output_baton->hunk_extra_context);
+ while (p > 0
+ && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
+ {
+ output_baton->hunk_extra_context[--p] = '\0';
+ }
+ invalid_character =
+ svn_utf__last_valid(output_baton->hunk_extra_context,
+ SVN_DIFF__EXTRA_CONTEXT_LENGTH);
+ for (p = invalid_character - output_baton->hunk_extra_context;
+ p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
+ {
+ output_baton->hunk_extra_context[p] = '\0';
+ }
+ }
+
+ /* Modified: Skip lines until we are at the start of the changed range */
+ SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
+ svn_diff__file_output_unified_skip,
+ modified_start));
+
+ /* Original: Output the context preceding the changed range */
+ SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
+ svn_diff__file_output_unified_context,
+ original_start));
+
+ /* Both: Output the changed range */
+ SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
+ svn_diff__file_output_unified_delete,
+ original_start + original_length));
+ SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
+ svn_diff__file_output_unified_insert,
+ modified_start + modified_length));
+
+ return SVN_NO_ERROR;
+}
+
+/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
+static svn_error_t *
+output_unified_default_hdr(const char **header, const char *path,
+ apr_pool_t *pool)
+{
+ apr_finfo_t file_info;
+ apr_time_exp_t exploded_time;
+ char time_buffer[64];
+ apr_size_t time_len;
+ const char *utf8_timestr;
+
+ SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
+ apr_time_exp_lt(&exploded_time, file_info.mtime);
+
+ apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
+ /* Order of date components can be different in different languages */
+ _("%a %b %e %H:%M:%S %Y"), &exploded_time);
+
+ SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
+
+ *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
+
+ return SVN_NO_ERROR;
+}
+
+static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
+{
+ NULL, /* output_common */
+ output_unified_diff_modified,
+ NULL, /* output_diff_latest */
+ NULL, /* output_diff_common */
+ NULL /* output_conflict */
+};
+
+svn_error_t *
+svn_diff_file_output_unified3(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_path,
+ const char *modified_path,
+ const char *original_header,
+ const char *modified_header,
+ const char *header_encoding,
+ const char *relative_to_dir,
+ svn_boolean_t show_c_function,
+ apr_pool_t *pool)
+{
+ if (svn_diff_contains_diffs(diff))
+ {
+ svn_diff__file_output_baton_t baton;
+ int i;
+
+ memset(&baton, 0, sizeof(baton));
+ baton.output_stream = output_stream;
+ baton.pool = pool;
+ baton.header_encoding = header_encoding;
+ baton.path[0] = original_path;
+ baton.path[1] = modified_path;
+ baton.hunk = svn_stringbuf_create_empty(pool);
+ baton.show_c_function = show_c_function;
+ baton.extra_context = svn_stringbuf_create_empty(pool);
+
+ if (show_c_function)
+ {
+ baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
+
+ APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
+ APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
+ APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
+ }
+
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
+ header_encoding, pool));
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
+ header_encoding, pool));
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
+ header_encoding, pool));
+
+ if (relative_to_dir)
+ {
+ /* Possibly adjust the "original" and "modified" paths shown in
+ the output (see issue #2723). */
+ const char *child_path;
+
+ if (! original_header)
+ {
+ child_path = svn_dirent_is_child(relative_to_dir,
+ original_path, pool);
+ if (child_path)
+ original_path = child_path;
+ else
+ return svn_error_createf(
+ SVN_ERR_BAD_RELATIVE_PATH, NULL,
+ _("Path '%s' must be inside "
+ "the directory '%s'"),
+ svn_dirent_local_style(original_path, pool),
+ svn_dirent_local_style(relative_to_dir,
+ pool));
+ }
+
+ if (! modified_header)
+ {
+ child_path = svn_dirent_is_child(relative_to_dir,
+ modified_path, pool);
+ if (child_path)
+ modified_path = child_path;
+ else
+ return svn_error_createf(
+ SVN_ERR_BAD_RELATIVE_PATH, NULL,
+ _("Path '%s' must be inside "
+ "the directory '%s'"),
+ svn_dirent_local_style(modified_path, pool),
+ svn_dirent_local_style(relative_to_dir,
+ pool));
+ }
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
+ APR_READ, APR_OS_DEFAULT, pool));
+ }
+
+ if (original_header == NULL)
+ {
+ SVN_ERR(output_unified_default_hdr(&original_header, original_path,
+ pool));
+ }
+
+ if (modified_header == NULL)
+ {
+ SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
+ pool));
+ }
+
+ SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
+ original_header, modified_header,
+ pool));
+
+ SVN_ERR(svn_diff_output(diff, &baton,
+ &svn_diff__file_output_unified_vtable));
+ SVN_ERR(output_unified_flush_hunk(&baton));
+
+ for (i = 0; i < 2; i++)
+ {
+ SVN_ERR(svn_io_file_close(baton.file[i], pool));
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+/** Display diff3 **/
+
+/* A stream to remember *leading* context. Note that this stream does
+ *not* copy the data that it is remembering; it just saves
+ *pointers! */
+typedef struct context_saver_t {
+ svn_stream_t *stream;
+ const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
+ apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
+ apr_size_t next_slot;
+ apr_size_t total_written;
+} context_saver_t;
+
+
+static svn_error_t *
+context_saver_stream_write(void *baton,
+ const char *data,
+ apr_size_t *len)
+{
+ context_saver_t *cs = baton;
+ cs->data[cs->next_slot] = data;
+ cs->len[cs->next_slot] = *len;
+ cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ cs->total_written++;
+ return SVN_NO_ERROR;
+}
+
+typedef struct svn_diff3__file_output_baton_t
+{
+ svn_stream_t *output_stream;
+
+ const char *path[3];
+
+ apr_off_t current_line[3];
+
+ char *buffer[3];
+ char *endp[3];
+ char *curp[3];
+
+ /* The following four members are in the encoding used for the output. */
+ const char *conflict_modified;
+ const char *conflict_original;
+ const char *conflict_separator;
+ const char *conflict_latest;
+
+ const char *marker_eol;
+
+ svn_diff_conflict_display_style_t conflict_style;
+
+ /* The rest of the fields are for
+ svn_diff_conflict_display_only_conflicts only. Note that for
+ these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
+ (soon after a conflict) a "trailing context stream", never the
+ actual output stream.*/
+ /* The actual output stream. */
+ svn_stream_t *real_output_stream;
+ context_saver_t *context_saver;
+ /* Used to allocate context_saver and trailing context streams, and
+ for some printfs. */
+ apr_pool_t *pool;
+} svn_diff3__file_output_baton_t;
+
+static svn_error_t *
+flush_context_saver(context_saver_t *cs,
+ svn_stream_t *output_stream)
+{
+ int i;
+ for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
+ {
+ apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ if (cs->data[slot])
+ {
+ apr_size_t len = cs->len[slot];
+ SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
+ }
+ }
+ return SVN_NO_ERROR;
+}
+
+static void
+make_context_saver(svn_diff3__file_output_baton_t *fob)
+{
+ context_saver_t *cs;
+
+ svn_pool_clear(fob->pool);
+ cs = apr_pcalloc(fob->pool, sizeof(*cs));
+ cs->stream = svn_stream_empty(fob->pool);
+ svn_stream_set_baton(cs->stream, cs);
+ svn_stream_set_write(cs->stream, context_saver_stream_write);
+ fob->context_saver = cs;
+ fob->output_stream = cs->stream;
+}
+
+
+/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
+ BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
+ a context_saver; used for *trailing* context. */
+
+struct trailing_context_printer {
+ apr_size_t lines_to_print;
+ svn_diff3__file_output_baton_t *fob;
+};
+
+
+
+static svn_error_t *
+trailing_context_printer_write(void *baton,
+ const char *data,
+ apr_size_t *len)
+{
+ struct trailing_context_printer *tcp = baton;
+ SVN_ERR_ASSERT(tcp->lines_to_print > 0);
+ SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
+ tcp->lines_to_print--;
+ if (tcp->lines_to_print == 0)
+ make_context_saver(tcp->fob);
+ return SVN_NO_ERROR;
+}
+
+
+static void
+make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
+{
+ struct trailing_context_printer *tcp;
+ svn_stream_t *s;
+
+ svn_pool_clear(btn->pool);
+
+ tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
+ tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ tcp->fob = btn;
+ s = svn_stream_empty(btn->pool);
+ svn_stream_set_baton(s, tcp);
+ svn_stream_set_write(s, trailing_context_printer_write);
+ btn->output_stream = s;
+}
+
+
+
+typedef enum svn_diff3__file_output_type_e
+{
+ svn_diff3__file_output_skip,
+ svn_diff3__file_output_normal
+} svn_diff3__file_output_type_e;
+
+
+static svn_error_t *
+output_line(svn_diff3__file_output_baton_t *baton,
+ svn_diff3__file_output_type_e type, int idx)
+{
+ char *curp;
+ char *endp;
+ char *eol;
+ apr_size_t len;
+
+ curp = baton->curp[idx];
+ endp = baton->endp[idx];
+
+ /* Lazily update the current line even if we're at EOF.
+ */
+ baton->current_line[idx]++;
+
+ if (curp == endp)
+ return SVN_NO_ERROR;
+
+ eol = svn_eol__find_eol_start(curp, endp - curp);
+ if (!eol)
+ eol = endp;
+ else
+ {
+ svn_boolean_t had_cr = (*eol == '\r');
+ eol++;
+ if (had_cr && eol != endp && *eol == '\n')
+ eol++;
+ }
+
+ if (type != svn_diff3__file_output_skip)
+ {
+ len = eol - curp;
+ /* Note that the trailing context printer assumes that
+ svn_stream_write is called exactly once per line. */
+ SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
+ }
+
+ baton->curp[idx] = eol;
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+output_marker_eol(svn_diff3__file_output_baton_t *btn)
+{
+ return svn_stream_puts(btn->output_stream, btn->marker_eol);
+}
+
+static svn_error_t *
+output_hunk(void *baton, int idx, apr_off_t target_line,
+ apr_off_t target_length)
+{
+ svn_diff3__file_output_baton_t *output_baton = baton;
+
+ /* Skip lines until we are at the start of the changed range */
+ while (output_baton->current_line[idx] < target_line)
+ {
+ SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
+ }
+
+ target_line += target_length;
+
+ while (output_baton->current_line[idx] < target_line)
+ {
+ SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ return output_hunk(baton, 1, modified_start, modified_length);
+}
+
+static svn_error_t *
+output_diff_modified(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ return output_hunk(baton, 1, modified_start, modified_length);
+}
+
+static svn_error_t *
+output_diff_latest(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ return output_hunk(baton, 2, latest_start, latest_length);
+}
+
+static svn_error_t *
+output_conflict(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length,
+ svn_diff_t *diff);
+
+static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
+{
+ output_common,
+ output_diff_modified,
+ output_diff_latest,
+ output_diff_modified, /* output_diff_common */
+ output_conflict
+};
+
+
+
+static svn_error_t *
+output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
+ apr_off_t original_start,
+ apr_off_t original_length,
+ apr_off_t modified_start,
+ apr_off_t modified_length,
+ apr_off_t latest_start,
+ apr_off_t latest_length)
+{
+ /* Are we currently saving starting context (as opposed to printing
+ trailing context)? If so, flush it. */
+ if (btn->output_stream == btn->context_saver->stream)
+ {
+ if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
+ SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
+ SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
+ }
+
+ /* Print to the real output stream. */
+ btn->output_stream = btn->real_output_stream;
+
+ /* Output the conflict itself. */
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (modified_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->conflict_modified,
+ modified_start + 1, modified_length));
+ SVN_ERR(output_marker_eol(btn));
+ SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
+
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (original_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->conflict_original,
+ original_start + 1, original_length));
+ SVN_ERR(output_marker_eol(btn));
+ SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
+
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ "%s%s", btn->conflict_separator, btn->marker_eol));
+ SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (latest_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->conflict_latest,
+ latest_start + 1, latest_length));
+ SVN_ERR(output_marker_eol(btn));
+
+ /* Go into print-trailing-context mode instead. */
+ make_trailing_context_printer(btn);
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+output_conflict(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length,
+ svn_diff_t *diff)
+{
+ svn_diff3__file_output_baton_t *file_baton = baton;
+
+ svn_diff_conflict_display_style_t style = file_baton->conflict_style;
+
+ if (style == svn_diff_conflict_display_only_conflicts)
+ return output_conflict_with_context(file_baton,
+ original_start, original_length,
+ modified_start, modified_length,
+ latest_start, latest_length);
+
+ if (style == svn_diff_conflict_display_resolved_modified_latest)
+ {
+ if (diff)
+ return svn_diff_output(diff, baton,
+ &svn_diff3__file_output_vtable);
+ else
+ style = svn_diff_conflict_display_modified_latest;
+ }
+
+ if (style == svn_diff_conflict_display_modified_latest ||
+ style == svn_diff_conflict_display_modified_original_latest)
+ {
+ SVN_ERR(svn_stream_puts(file_baton->output_stream,
+ file_baton->conflict_modified));
+ SVN_ERR(output_marker_eol(file_baton));
+
+ SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
+
+ if (style == svn_diff_conflict_display_modified_original_latest)
+ {
+ SVN_ERR(svn_stream_puts(file_baton->output_stream,
+ file_baton->conflict_original));
+ SVN_ERR(output_marker_eol(file_baton));
+ SVN_ERR(output_hunk(baton, 0, original_start, original_length));
+ }
+
+ SVN_ERR(svn_stream_puts(file_baton->output_stream,
+ file_baton->conflict_separator));
+ SVN_ERR(output_marker_eol(file_baton));
+
+ SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
+
+ SVN_ERR(svn_stream_puts(file_baton->output_stream,
+ file_baton->conflict_latest));
+ SVN_ERR(output_marker_eol(file_baton));
+ }
+ else if (style == svn_diff_conflict_display_modified)
+ SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
+ else if (style == svn_diff_conflict_display_latest)
+ SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
+ else /* unknown style */
+ SVN_ERR_MALFUNCTION();
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_file_output_merge2(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_path,
+ const char *modified_path,
+ const char *latest_path,
+ const char *conflict_original,
+ const char *conflict_modified,
+ const char *conflict_latest,
+ const char *conflict_separator,
+ svn_diff_conflict_display_style_t style,
+ apr_pool_t *pool)
+{
+ svn_diff3__file_output_baton_t baton;
+ apr_file_t *file[3];
+ int idx;
+#if APR_HAS_MMAP
+ apr_mmap_t *mm[3] = { 0 };
+#endif /* APR_HAS_MMAP */
+ const char *eol;
+ svn_boolean_t conflicts_only =
+ (style == svn_diff_conflict_display_only_conflicts);
+
+ memset(&baton, 0, sizeof(baton));
+ if (conflicts_only)
+ {
+ baton.pool = svn_pool_create(pool);
+ make_context_saver(&baton);
+ baton.real_output_stream = output_stream;
+ }
+ else
+ baton.output_stream = output_stream;
+ baton.path[0] = original_path;
+ baton.path[1] = modified_path;
+ baton.path[2] = latest_path;
+ SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
+ conflict_modified ? conflict_modified
+ : apr_psprintf(pool, "<<<<<<< %s",
+ modified_path),
+ pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
+ conflict_original ? conflict_original
+ : apr_psprintf(pool, "||||||| %s",
+ original_path),
+ pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
+ conflict_separator ? conflict_separator
+ : "=======", pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
+ conflict_latest ? conflict_latest
+ : apr_psprintf(pool, ">>>>>>> %s",
+ latest_path),
+ pool));
+
+ baton.conflict_style = style;
+
+ for (idx = 0; idx < 3; idx++)
+ {
+ apr_off_t size;
+
+ SVN_ERR(map_or_read_file(&file[idx],
+ MMAP_T_ARG(mm[idx])
+ &baton.buffer[idx], &size,
+ baton.path[idx], pool));
+
+ baton.curp[idx] = baton.buffer[idx];
+ baton.endp[idx] = baton.buffer[idx];
+
+ if (baton.endp[idx])
+ baton.endp[idx] += size;
+ }
+
+ /* Check what eol marker we should use for conflict markers.
+ We use the eol marker of the modified file and fall back on the
+ platform's eol marker if that file doesn't contain any newlines. */
+ eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
+ NULL);
+ if (! eol)
+ eol = APR_EOL_STR;
+ baton.marker_eol = eol;
+
+ SVN_ERR(svn_diff_output(diff, &baton,
+ &svn_diff3__file_output_vtable));
+
+ for (idx = 0; idx < 3; idx++)
+ {
+#if APR_HAS_MMAP
+ if (mm[idx])
+ {
+ apr_status_t rv = apr_mmap_delete(mm[idx]);
+ if (rv != APR_SUCCESS)
+ {
+ return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
+ baton.path[idx]);
+ }
+ }
+#endif /* APR_HAS_MMAP */
+
+ if (file[idx])
+ {
+ SVN_ERR(svn_io_file_close(file[idx], pool));
+ }
+ }
+
+ if (conflicts_only)
+ svn_pool_destroy(baton.pool);
+
+ return SVN_NO_ERROR;
+}
+
diff --git a/subversion/libsvn_diff/diff_memory.c b/subversion/libsvn_diff/diff_memory.c
new file mode 100644
index 000000000000..00f4c7fd4184
--- /dev/null
+++ b/subversion/libsvn_diff/diff_memory.c
@@ -0,0 +1,1161 @@
+/*
+ * diff_memory.c : routines for doing diffs on in-memory data
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#define WANT_MEMFUNC
+#define WANT_STRFUNC
+#include <apr.h>
+#include <apr_want.h>
+#include <apr_tables.h>
+
+#include <assert.h>
+
+#include "svn_diff.h"
+#include "svn_pools.h"
+#include "svn_types.h"
+#include "svn_string.h"
+#include "svn_utf.h"
+#include "diff.h"
+#include "svn_private_config.h"
+#include "private/svn_adler32.h"
+#include "private/svn_diff_private.h"
+
+typedef struct source_tokens_t
+{
+ /* A token simply is an svn_string_t pointing to
+ the data of the in-memory data source, containing
+ the raw token text, with length stored in the string */
+ apr_array_header_t *tokens;
+
+ /* Next token to be consumed */
+ apr_size_t next_token;
+
+ /* The source, containing the in-memory data to be diffed */
+ const svn_string_t *source;
+
+ /* The last token ends with a newline character (sequence) */
+ svn_boolean_t ends_without_eol;
+} source_tokens_t;
+
+typedef struct diff_mem_baton_t
+{
+ /* The tokens for each of the sources */
+ source_tokens_t sources[4];
+
+ /* Normalization buffer; we only ever compare 2 tokens at the same time */
+ char *normalization_buf[2];
+
+ /* Options for normalized comparison of the data sources */
+ const svn_diff_file_options_t *normalization_options;
+} diff_mem_baton_t;
+
+
+static int
+datasource_to_index(svn_diff_datasource_e datasource)
+{
+ switch (datasource)
+ {
+ case svn_diff_datasource_original:
+ return 0;
+
+ case svn_diff_datasource_modified:
+ return 1;
+
+ case svn_diff_datasource_latest:
+ return 2;
+
+ case svn_diff_datasource_ancestor:
+ return 3;
+ }
+
+ return -1;
+}
+
+
+/* Implements svn_diff_fns2_t::datasources_open */
+static svn_error_t *
+datasources_open(void *baton,
+ apr_off_t *prefix_lines,
+ apr_off_t *suffix_lines,
+ const svn_diff_datasource_e *datasources,
+ apr_size_t datasources_len)
+{
+ /* Do nothing: everything is already there and initialized to 0 */
+ *prefix_lines = 0;
+ *suffix_lines = 0;
+ return SVN_NO_ERROR;
+}
+
+
+/* Implements svn_diff_fns2_t::datasource_close */
+static svn_error_t *
+datasource_close(void *baton, svn_diff_datasource_e datasource)
+{
+ /* Do nothing. The compare_token function needs previous datasources
+ * to stay available until all datasources are processed.
+ */
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Implements svn_diff_fns2_t::datasource_get_next_token */
+static svn_error_t *
+datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
+ svn_diff_datasource_e datasource)
+{
+ diff_mem_baton_t *mem_baton = baton;
+ source_tokens_t *src = &(mem_baton->sources[datasource_to_index(datasource)]);
+
+ if ((apr_size_t)src->tokens->nelts > src->next_token)
+ {
+ /* There are actually tokens to be returned */
+ char *buf = mem_baton->normalization_buf[0];
+ svn_string_t *tok = (*token)
+ = APR_ARRAY_IDX(src->tokens, src->next_token, svn_string_t *);
+ apr_off_t len = tok->len;
+ svn_diff__normalize_state_t state
+ = svn_diff__normalize_state_normal;
+
+ svn_diff__normalize_buffer(&buf, &len, &state, tok->data,
+ mem_baton->normalization_options);
+ *hash = svn__adler32(0, buf, len);
+ src->next_token++;
+ }
+ else
+ *token = NULL;
+
+ return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_fns2_t::token_compare */
+static svn_error_t *
+token_compare(void *baton, void *token1, void *token2, int *result)
+{
+ /* Implement the same behaviour as diff_file.c:token_compare(),
+ but be simpler, because we know we'll have all data in memory */
+ diff_mem_baton_t *btn = baton;
+ svn_string_t *t1 = token1;
+ svn_string_t *t2 = token2;
+ char *buf1 = btn->normalization_buf[0];
+ char *buf2 = btn->normalization_buf[1];
+ apr_off_t len1 = t1->len;
+ apr_off_t len2 = t2->len;
+ svn_diff__normalize_state_t state = svn_diff__normalize_state_normal;
+
+ svn_diff__normalize_buffer(&buf1, &len1, &state, t1->data,
+ btn->normalization_options);
+ state = svn_diff__normalize_state_normal;
+ svn_diff__normalize_buffer(&buf2, &len2, &state, t2->data,
+ btn->normalization_options);
+
+ if (len1 != len2)
+ *result = (len1 < len2) ? -1 : 1;
+ else
+ *result = (len1 == 0) ? 0 : memcmp(buf1, buf2, (size_t) len1);
+
+ return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_fns2_t::token_discard */
+static void
+token_discard(void *baton, void *token)
+{
+ /* No-op, we have no use for discarded tokens... */
+}
+
+
+/* Implements svn_diff_fns2_t::token_discard_all */
+static void
+token_discard_all(void *baton)
+{
+ /* Do nothing.
+ Note that in the file case, this function discards all
+ tokens allocated, but we're geared toward small in-memory diffs.
+ Meaning that there's no special pool to clear.
+ */
+}
+
+
+static const svn_diff_fns2_t svn_diff__mem_vtable =
+{
+ datasources_open,
+ datasource_close,
+ datasource_get_next_token,
+ token_compare,
+ token_discard,
+ token_discard_all
+};
+
+/* Fill SRC with the diff tokens (e.g. lines).
+
+ TEXT is assumed to live long enough for the tokens to
+ stay valid during their lifetime: no data is copied,
+ instead, svn_string_t's are allocated pointing straight
+ into TEXT.
+*/
+static void
+fill_source_tokens(source_tokens_t *src,
+ const svn_string_t *text,
+ apr_pool_t *pool)
+{
+ const char *curp;
+ const char *endp;
+ const char *startp;
+
+ src->tokens = apr_array_make(pool, 0, sizeof(svn_string_t *));
+ src->next_token = 0;
+ src->source = text;
+
+ for (startp = curp = text->data, endp = curp + text->len;
+ curp != endp; curp++)
+ {
+ if (curp != endp && *curp == '\r' && *(curp + 1) == '\n')
+ curp++;
+
+ if (*curp == '\r' || *curp == '\n')
+ {
+ APR_ARRAY_PUSH(src->tokens, svn_string_t *) =
+ svn_string_ncreate(startp, curp - startp + 1, pool);
+
+ startp = curp + 1;
+ }
+ }
+
+ /* If there's anything remaining (ie last line doesn't have a newline) */
+ if (startp != endp)
+ {
+ APR_ARRAY_PUSH(src->tokens, svn_string_t *) =
+ svn_string_ncreate(startp, endp - startp, pool);
+ src->ends_without_eol = TRUE;
+ }
+ else
+ src->ends_without_eol = FALSE;
+}
+
+
+static void
+alloc_normalization_bufs(diff_mem_baton_t *btn,
+ int sources,
+ apr_pool_t *pool)
+{
+ apr_size_t max_len = 0;
+ apr_off_t idx;
+ int i;
+
+ for (i = 0; i < sources; i++)
+ {
+ apr_array_header_t *tokens = btn->sources[i].tokens;
+ if (tokens->nelts > 0)
+ for (idx = 0; idx < tokens->nelts; idx++)
+ {
+ apr_size_t token_len
+ = APR_ARRAY_IDX(tokens, idx, svn_string_t *)->len;
+ max_len = (max_len < token_len) ? token_len : max_len;
+ }
+ }
+
+ btn->normalization_buf[0] = apr_palloc(pool, max_len);
+ btn->normalization_buf[1] = apr_palloc(pool, max_len);
+}
+
+svn_error_t *
+svn_diff_mem_string_diff(svn_diff_t **diff,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ diff_mem_baton_t baton;
+
+ fill_source_tokens(&(baton.sources[0]), original, pool);
+ fill_source_tokens(&(baton.sources[1]), modified, pool);
+ alloc_normalization_bufs(&baton, 2, pool);
+
+ baton.normalization_options = options;
+
+ return svn_diff_diff_2(diff, &baton, &svn_diff__mem_vtable, pool);
+}
+
+svn_error_t *
+svn_diff_mem_string_diff3(svn_diff_t **diff,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ const svn_string_t *latest,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ diff_mem_baton_t baton;
+
+ fill_source_tokens(&(baton.sources[0]), original, pool);
+ fill_source_tokens(&(baton.sources[1]), modified, pool);
+ fill_source_tokens(&(baton.sources[2]), latest, pool);
+ alloc_normalization_bufs(&baton, 3, pool);
+
+ baton.normalization_options = options;
+
+ return svn_diff_diff3_2(diff, &baton, &svn_diff__mem_vtable, pool);
+}
+
+
+svn_error_t *
+svn_diff_mem_string_diff4(svn_diff_t **diff,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ const svn_string_t *latest,
+ const svn_string_t *ancestor,
+ const svn_diff_file_options_t *options,
+ apr_pool_t *pool)
+{
+ diff_mem_baton_t baton;
+
+ fill_source_tokens(&(baton.sources[0]), original, pool);
+ fill_source_tokens(&(baton.sources[1]), modified, pool);
+ fill_source_tokens(&(baton.sources[2]), latest, pool);
+ fill_source_tokens(&(baton.sources[3]), ancestor, pool);
+ alloc_normalization_bufs(&baton, 4, pool);
+
+ baton.normalization_options = options;
+
+ return svn_diff_diff4_2(diff, &baton, &svn_diff__mem_vtable, pool);
+}
+
+
+typedef enum unified_output_e
+{
+ unified_output_context = 0,
+ unified_output_delete,
+ unified_output_insert,
+ unified_output_skip
+} unified_output_e;
+
+/* Baton for generating unified diffs */
+typedef struct unified_output_baton_t
+{
+ svn_stream_t *output_stream;
+ const char *header_encoding;
+ source_tokens_t sources[2]; /* 0 == original; 1 == modified */
+ apr_off_t current_token[2]; /* current token per source */
+
+ /* Cached markers, in header_encoding,
+ indexed using unified_output_e */
+ const char *prefix_str[3];
+
+ svn_stringbuf_t *hunk; /* in-progress hunk data */
+ apr_off_t hunk_length[2]; /* 0 == original; 1 == modified */
+ apr_off_t hunk_start[2]; /* 0 == original; 1 == modified */
+
+ /* The delimiters of the hunk header, '@@' for text hunks and '##' for
+ * property hunks. */
+ const char *hunk_delimiter;
+ /* The string to print after a line that does not end with a newline.
+ * It must start with a '\'. Typically "\ No newline at end of file". */
+ const char *no_newline_string;
+
+ /* Pool for allocation of temporary memory in the callbacks
+ Should be cleared on entry of each iteration of a callback */
+ apr_pool_t *pool;
+} output_baton_t;
+
+
+/* Append tokens (lines) FIRST up to PAST_LAST
+ from token-source index TOKENS with change-type TYPE
+ to the current hunk.
+*/
+static svn_error_t *
+output_unified_token_range(output_baton_t *btn,
+ int tokens,
+ unified_output_e type,
+ apr_off_t until)
+{
+ source_tokens_t *source = &btn->sources[tokens];
+
+ if (until > source->tokens->nelts)
+ until = source->tokens->nelts;
+
+ if (until <= btn->current_token[tokens])
+ return SVN_NO_ERROR;
+
+ /* Do the loop with prefix and token */
+ while (TRUE)
+ {
+ svn_string_t *token =
+ APR_ARRAY_IDX(source->tokens, btn->current_token[tokens],
+ svn_string_t *);
+
+ if (type != unified_output_skip)
+ {
+ svn_stringbuf_appendcstr(btn->hunk, btn->prefix_str[type]);
+ svn_stringbuf_appendbytes(btn->hunk, token->data, token->len);
+ }
+
+ if (type == unified_output_context)
+ {
+ btn->hunk_length[0]++;
+ btn->hunk_length[1]++;
+ }
+ else if (type == unified_output_delete)
+ btn->hunk_length[0]++;
+ else if (type == unified_output_insert)
+ btn->hunk_length[1]++;
+
+ /* ### TODO: Add skip processing for -p handling? */
+
+ btn->current_token[tokens]++;
+ if (btn->current_token[tokens] == until)
+ break;
+ }
+
+ if (btn->current_token[tokens] == source->tokens->nelts
+ && source->ends_without_eol)
+ {
+ const char *out_str;
+
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2(
+ &out_str, btn->no_newline_string,
+ btn->header_encoding, btn->pool));
+ svn_stringbuf_appendcstr(btn->hunk, out_str);
+ }
+
+
+
+ return SVN_NO_ERROR;
+}
+
+/* Flush the hunk currently built up in BATON
+ into the BATON's output_stream.
+ Use the specified HUNK_DELIMITER.
+ If HUNK_DELIMITER is NULL, fall back to the default delimiter. */
+static svn_error_t *
+output_unified_flush_hunk(output_baton_t *baton,
+ const char *hunk_delimiter)
+{
+ apr_off_t target_token;
+ apr_size_t hunk_len;
+ apr_off_t old_start;
+ apr_off_t new_start;
+
+ if (svn_stringbuf_isempty(baton->hunk))
+ return SVN_NO_ERROR;
+
+ svn_pool_clear(baton->pool);
+
+ /* Write the trailing context */
+ target_token = baton->hunk_start[0] + baton->hunk_length[0]
+ + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ SVN_ERR(output_unified_token_range(baton, 0 /*original*/,
+ unified_output_context,
+ target_token));
+ if (hunk_delimiter == NULL)
+ hunk_delimiter = "@@";
+
+ old_start = baton->hunk_start[0];
+ new_start = baton->hunk_start[1];
+
+ /* If the file is non-empty, convert the line indexes from
+ zero based to one based */
+ if (baton->hunk_length[0])
+ old_start++;
+ if (baton->hunk_length[1])
+ new_start++;
+
+ /* Write the hunk header */
+ SVN_ERR(svn_diff__unified_write_hunk_header(
+ baton->output_stream, baton->header_encoding, hunk_delimiter,
+ old_start, baton->hunk_length[0],
+ new_start, baton->hunk_length[1],
+ NULL /* hunk_extra_context */,
+ baton->pool));
+
+ hunk_len = baton->hunk->len;
+ SVN_ERR(svn_stream_write(baton->output_stream,
+ baton->hunk->data, &hunk_len));
+
+ /* Prepare for the next hunk */
+ baton->hunk_length[0] = 0;
+ baton->hunk_length[1] = 0;
+ baton->hunk_start[0] = 0;
+ baton->hunk_start[1] = 0;
+ svn_stringbuf_setempty(baton->hunk);
+
+ return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_output_fns_t::output_diff_modified */
+static svn_error_t *
+output_unified_diff_modified(void *baton,
+ apr_off_t original_start,
+ apr_off_t original_length,
+ apr_off_t modified_start,
+ apr_off_t modified_length,
+ apr_off_t latest_start,
+ apr_off_t latest_length)
+{
+ output_baton_t *output_baton = baton;
+ apr_off_t context_prefix_length;
+ apr_off_t prev_context_end;
+ svn_boolean_t init_hunk = FALSE;
+
+ if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
+ context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ else
+ context_prefix_length = original_start;
+
+ /* Calculate where the previous hunk will end if we would write it now
+ (including the necessary context at the end) */
+ if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
+ {
+ prev_context_end = output_baton->hunk_start[0]
+ + output_baton->hunk_length[0]
+ + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ }
+ else
+ {
+ prev_context_end = -1;
+
+ if (output_baton->hunk_start[0] == 0
+ && (original_length > 0 || modified_length > 0))
+ init_hunk = TRUE;
+ }
+
+ /* If the changed range is far enough from the previous range, flush the current
+ hunk. */
+ {
+ apr_off_t new_hunk_start = (original_start - context_prefix_length);
+
+ if (output_baton->current_token[0] < new_hunk_start
+ && prev_context_end <= new_hunk_start)
+ {
+ SVN_ERR(output_unified_flush_hunk(output_baton,
+ output_baton->hunk_delimiter));
+ init_hunk = TRUE;
+ }
+ else if (output_baton->hunk_length[0] > 0
+ || output_baton->hunk_length[1] > 0)
+ {
+ /* We extend the current hunk */
+
+ /* Original: Output the context preceding the changed range */
+ SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+ unified_output_context,
+ original_start));
+ }
+ }
+
+ /* Original: Skip lines until we are at the beginning of the context we want
+ to display */
+ SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+ unified_output_skip,
+ original_start - context_prefix_length));
+
+ if (init_hunk)
+ {
+ SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
+ && output_baton->hunk_length[1] == 0);
+
+ output_baton->hunk_start[0] = original_start - context_prefix_length;
+ output_baton->hunk_start[1] = modified_start - context_prefix_length;
+ }
+
+ /* Modified: Skip lines until we are at the start of the changed range */
+ SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */,
+ unified_output_skip,
+ modified_start));
+
+ /* Original: Output the context preceding the changed range */
+ SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+ unified_output_context,
+ original_start));
+
+ /* Both: Output the changed range */
+ SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+ unified_output_delete,
+ original_start + original_length));
+ SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */,
+ unified_output_insert,
+ modified_start + modified_length));
+
+ return SVN_NO_ERROR;
+}
+
+static const svn_diff_output_fns_t mem_output_unified_vtable =
+{
+ NULL, /* output_common */
+ output_unified_diff_modified,
+ NULL, /* output_diff_latest */
+ NULL, /* output_diff_common */
+ NULL /* output_conflict */
+};
+
+
+svn_error_t *
+svn_diff_mem_string_output_unified2(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ svn_boolean_t with_diff_header,
+ const char *hunk_delimiter,
+ const char *original_header,
+ const char *modified_header,
+ const char *header_encoding,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ apr_pool_t *pool)
+{
+
+ if (svn_diff_contains_diffs(diff))
+ {
+ output_baton_t baton;
+
+ memset(&baton, 0, sizeof(baton));
+ baton.output_stream = output_stream;
+ baton.pool = svn_pool_create(pool);
+ baton.header_encoding = header_encoding;
+ baton.hunk = svn_stringbuf_create_empty(pool);
+ baton.hunk_delimiter = hunk_delimiter;
+ baton.no_newline_string
+ = (hunk_delimiter == NULL || strcmp(hunk_delimiter, "##") != 0)
+ ? APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR
+ : APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_PROPERTY APR_EOL_STR;
+
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2
+ (&(baton.prefix_str[unified_output_context]), " ",
+ header_encoding, pool));
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2
+ (&(baton.prefix_str[unified_output_delete]), "-",
+ header_encoding, pool));
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2
+ (&(baton.prefix_str[unified_output_insert]), "+",
+ header_encoding, pool));
+
+ fill_source_tokens(&baton.sources[0], original, pool);
+ fill_source_tokens(&baton.sources[1], modified, pool);
+
+ if (with_diff_header)
+ {
+ SVN_ERR(svn_diff__unidiff_write_header(
+ output_stream, header_encoding,
+ original_header, modified_header, pool));
+ }
+
+ SVN_ERR(svn_diff_output(diff, &baton,
+ &mem_output_unified_vtable));
+
+ SVN_ERR(output_unified_flush_hunk(&baton, hunk_delimiter));
+
+ svn_pool_destroy(baton.pool);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_mem_string_output_unified(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const char *original_header,
+ const char *modified_header,
+ const char *header_encoding,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ apr_pool_t *pool)
+{
+ SVN_ERR(svn_diff_mem_string_output_unified2(output_stream,
+ diff,
+ TRUE,
+ NULL,
+ original_header,
+ modified_header,
+ header_encoding,
+ original,
+ modified,
+ pool));
+ return SVN_NO_ERROR;
+}
+
+
+
+/* diff3 merge output */
+
+/* A stream to remember *leading* context. Note that this stream does
+ *not* copy the data that it is remembering; it just saves
+ *pointers! */
+typedef struct context_saver_t {
+ svn_stream_t *stream;
+ const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
+ apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
+ apr_size_t next_slot;
+ apr_size_t total_written;
+} context_saver_t;
+
+
+static svn_error_t *
+context_saver_stream_write(void *baton,
+ const char *data,
+ apr_size_t *len)
+{
+ context_saver_t *cs = baton;
+ cs->data[cs->next_slot] = data;
+ cs->len[cs->next_slot] = *len;
+ cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ cs->total_written++;
+ return SVN_NO_ERROR;
+}
+
+
+typedef struct merge_output_baton_t
+{
+ svn_stream_t *output_stream;
+
+ /* Tokenized source text */
+ source_tokens_t sources[3];
+ apr_off_t next_token[3];
+
+ /* Markers for marking conflicted sections */
+ const char *markers[4]; /* 0 = original, 1 = modified,
+ 2 = separator, 3 = latest (end) */
+ const char *marker_eol;
+
+ svn_diff_conflict_display_style_t conflict_style;
+
+ /* The rest of the fields are for
+ svn_diff_conflict_display_only_conflicts only. Note that for
+ these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
+ (soon after a conflict) a "trailing context stream", never the
+ actual output stream.*/
+ /* The actual output stream. */
+ svn_stream_t *real_output_stream;
+ context_saver_t *context_saver;
+ /* Used to allocate context_saver and trailing context streams, and
+ for some printfs. */
+ apr_pool_t *pool;
+} merge_output_baton_t;
+
+
+static svn_error_t *
+flush_context_saver(context_saver_t *cs,
+ svn_stream_t *output_stream)
+{
+ int i;
+ for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
+ {
+ apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ if (cs->data[slot])
+ {
+ apr_size_t len = cs->len[slot];
+ SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
+ }
+ }
+ return SVN_NO_ERROR;
+}
+
+
+static void
+make_context_saver(merge_output_baton_t *mob)
+{
+ context_saver_t *cs;
+
+ svn_pool_clear(mob->pool);
+ cs = apr_pcalloc(mob->pool, sizeof(*cs));
+ cs->stream = svn_stream_empty(mob->pool);
+ svn_stream_set_baton(cs->stream, cs);
+ svn_stream_set_write(cs->stream, context_saver_stream_write);
+ mob->context_saver = cs;
+ mob->output_stream = cs->stream;
+}
+
+
+/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
+ BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
+ a context_saver; used for *trailing* context. */
+
+struct trailing_context_printer {
+ apr_size_t lines_to_print;
+ merge_output_baton_t *mob;
+};
+
+
+static svn_error_t *
+trailing_context_printer_write(void *baton,
+ const char *data,
+ apr_size_t *len)
+{
+ struct trailing_context_printer *tcp = baton;
+ SVN_ERR_ASSERT(tcp->lines_to_print > 0);
+ SVN_ERR(svn_stream_write(tcp->mob->real_output_stream, data, len));
+ tcp->lines_to_print--;
+ if (tcp->lines_to_print == 0)
+ make_context_saver(tcp->mob);
+ return SVN_NO_ERROR;
+}
+
+
+static void
+make_trailing_context_printer(merge_output_baton_t *btn)
+{
+ struct trailing_context_printer *tcp;
+ svn_stream_t *s;
+
+ svn_pool_clear(btn->pool);
+
+ tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
+ tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+ tcp->mob = btn;
+ s = svn_stream_empty(btn->pool);
+ svn_stream_set_baton(s, tcp);
+ svn_stream_set_write(s, trailing_context_printer_write);
+ btn->output_stream = s;
+}
+
+
+static svn_error_t *
+output_merge_token_range(apr_size_t *lines_printed_p,
+ merge_output_baton_t *btn,
+ int idx, apr_off_t first,
+ apr_off_t length)
+{
+ apr_array_header_t *tokens = btn->sources[idx].tokens;
+ apr_size_t lines_printed = 0;
+
+ for (; length > 0 && first < tokens->nelts; length--, first++)
+ {
+ svn_string_t *token = APR_ARRAY_IDX(tokens, first, svn_string_t *);
+ apr_size_t len = token->len;
+
+ /* Note that the trailing context printer assumes that
+ svn_stream_write is called exactly once per line. */
+ SVN_ERR(svn_stream_write(btn->output_stream, token->data, &len));
+ lines_printed++;
+ }
+
+ if (lines_printed_p)
+ *lines_printed_p = lines_printed;
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+output_marker_eol(merge_output_baton_t *btn)
+{
+ return svn_stream_puts(btn->output_stream, btn->marker_eol);
+}
+
+static svn_error_t *
+output_merge_marker(merge_output_baton_t *btn, int idx)
+{
+ SVN_ERR(svn_stream_puts(btn->output_stream, btn->markers[idx]));
+ return output_marker_eol(btn);
+}
+
+static svn_error_t *
+output_common_modified(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ return output_merge_token_range(NULL, baton, 1/*modified*/,
+ modified_start, modified_length);
+}
+
+static svn_error_t *
+output_latest(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length)
+{
+ return output_merge_token_range(NULL, baton, 2/*latest*/,
+ latest_start, latest_length);
+}
+
+static svn_error_t *
+output_conflict(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length,
+ svn_diff_t *diff);
+
+static const svn_diff_output_fns_t merge_output_vtable =
+{
+ output_common_modified, /* common */
+ output_common_modified, /* modified */
+ output_latest,
+ output_common_modified, /* output_diff_common */
+ output_conflict
+};
+
+static svn_error_t *
+output_conflict(void *baton,
+ apr_off_t original_start, apr_off_t original_length,
+ apr_off_t modified_start, apr_off_t modified_length,
+ apr_off_t latest_start, apr_off_t latest_length,
+ svn_diff_t *diff)
+{
+ merge_output_baton_t *btn = baton;
+
+ svn_diff_conflict_display_style_t style = btn->conflict_style;
+
+ if (style == svn_diff_conflict_display_resolved_modified_latest)
+ {
+ if (diff)
+ return svn_diff_output(diff, baton, &merge_output_vtable);
+ else
+ style = svn_diff_conflict_display_modified_latest;
+ }
+
+ if (style == svn_diff_conflict_display_modified_latest ||
+ style == svn_diff_conflict_display_modified_original_latest)
+ {
+ SVN_ERR(output_merge_marker(btn, 1/*modified*/));
+ SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/,
+ modified_start, modified_length));
+
+ if (style == svn_diff_conflict_display_modified_original_latest)
+ {
+ SVN_ERR(output_merge_marker(btn, 0/*original*/));
+ SVN_ERR(output_merge_token_range(NULL, btn, 0/*original*/,
+ original_start, original_length));
+ }
+
+ SVN_ERR(output_merge_marker(btn, 2/*separator*/));
+ SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/,
+ latest_start, latest_length));
+ SVN_ERR(output_merge_marker(btn, 3/*latest (end)*/));
+ }
+ else if (style == svn_diff_conflict_display_modified)
+ SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/,
+ modified_start, modified_length));
+ else if (style == svn_diff_conflict_display_latest)
+ SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/,
+ latest_start, latest_length));
+ else /* unknown style */
+ SVN_ERR_MALFUNCTION();
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+output_conflict_with_context(void *baton,
+ apr_off_t original_start,
+ apr_off_t original_length,
+ apr_off_t modified_start,
+ apr_off_t modified_length,
+ apr_off_t latest_start,
+ apr_off_t latest_length,
+ svn_diff_t *diff)
+{
+ merge_output_baton_t *btn = baton;
+
+ /* Are we currently saving starting context (as opposed to printing
+ trailing context)? If so, flush it. */
+ if (btn->output_stream == btn->context_saver->stream)
+ {
+ if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
+ SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
+ SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
+ }
+
+ /* Print to the real output stream. */
+ btn->output_stream = btn->real_output_stream;
+
+ /* Output the conflict itself. */
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (modified_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->markers[1],
+ modified_start + 1, modified_length));
+ SVN_ERR(output_marker_eol(btn));
+ SVN_ERR(output_merge_token_range(NULL, btn, 1/*modified*/,
+ modified_start, modified_length));
+
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (original_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->markers[0],
+ original_start + 1, original_length));
+ SVN_ERR(output_marker_eol(btn));
+ SVN_ERR(output_merge_token_range(NULL, btn, 0/*original*/,
+ original_start, original_length));
+
+ SVN_ERR(output_merge_marker(btn, 2/*separator*/));
+ SVN_ERR(output_merge_token_range(NULL, btn, 2/*latest*/,
+ latest_start, latest_length));
+ SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
+ (latest_length == 1
+ ? "%s (%" APR_OFF_T_FMT ")"
+ : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
+ btn->markers[3],
+ latest_start + 1, latest_length));
+ SVN_ERR(output_marker_eol(btn));
+
+ /* Go into print-trailing-context mode instead. */
+ make_trailing_context_printer(btn);
+
+ return SVN_NO_ERROR;
+}
+
+
+static const svn_diff_output_fns_t merge_only_conflicts_output_vtable =
+{
+ output_common_modified,
+ output_common_modified,
+ output_latest,
+ output_common_modified,
+ output_conflict_with_context
+};
+
+
+/* TOKEN is the first token in the modified file.
+ Return its line-ending, if any. */
+static const char *
+detect_eol(svn_string_t *token)
+{
+ const char *curp;
+
+ if (token->len == 0)
+ return NULL;
+
+ curp = token->data + token->len - 1;
+ if (*curp == '\r')
+ return "\r";
+ else if (*curp != '\n')
+ return NULL;
+ else
+ {
+ if (token->len == 1
+ || *(--curp) != '\r')
+ return "\n";
+ else
+ return "\r\n";
+ }
+}
+
+svn_error_t *
+svn_diff_mem_string_output_merge2(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ const svn_string_t *latest,
+ const char *conflict_original,
+ const char *conflict_modified,
+ const char *conflict_latest,
+ const char *conflict_separator,
+ svn_diff_conflict_display_style_t style,
+ apr_pool_t *pool)
+{
+ merge_output_baton_t btn;
+ const char *eol;
+ svn_boolean_t conflicts_only =
+ (style == svn_diff_conflict_display_only_conflicts);
+ const svn_diff_output_fns_t *vtable = conflicts_only
+ ? &merge_only_conflicts_output_vtable : &merge_output_vtable;
+
+ memset(&btn, 0, sizeof(btn));
+
+ if (conflicts_only)
+ {
+ btn.pool = svn_pool_create(pool);
+ make_context_saver(&btn);
+ btn.real_output_stream = output_stream;
+ }
+ else
+ btn.output_stream = output_stream;
+
+ fill_source_tokens(&(btn.sources[0]), original, pool);
+ fill_source_tokens(&(btn.sources[1]), modified, pool);
+ fill_source_tokens(&(btn.sources[2]), latest, pool);
+
+ btn.conflict_style = style;
+
+ if (btn.sources[1].tokens->nelts > 0)
+ {
+ eol = detect_eol(APR_ARRAY_IDX(btn.sources[1].tokens, 0, svn_string_t *));
+ if (!eol)
+ eol = APR_EOL_STR; /* use the platform default */
+ }
+ else
+ eol = APR_EOL_STR; /* use the platform default */
+
+ btn.marker_eol = eol;
+
+ SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[1],
+ conflict_modified
+ ? conflict_modified
+ : "<<<<<<< (modified)",
+ pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[0],
+ conflict_original
+ ? conflict_original
+ : "||||||| (original)",
+ pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[2],
+ conflict_separator
+ ? conflict_separator
+ : "=======",
+ pool));
+ SVN_ERR(svn_utf_cstring_from_utf8(&btn.markers[3],
+ conflict_latest
+ ? conflict_latest
+ : ">>>>>>> (latest)",
+ pool));
+
+ SVN_ERR(svn_diff_output(diff, &btn, vtable));
+ if (conflicts_only)
+ svn_pool_destroy(btn.pool);
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_mem_string_output_merge(svn_stream_t *output_stream,
+ svn_diff_t *diff,
+ const svn_string_t *original,
+ const svn_string_t *modified,
+ const svn_string_t *latest,
+ const char *conflict_original,
+ const char *conflict_modified,
+ const char *conflict_latest,
+ const char *conflict_separator,
+ svn_boolean_t display_original_in_conflict,
+ svn_boolean_t display_resolved_conflicts,
+ apr_pool_t *pool)
+{
+ svn_diff_conflict_display_style_t style =
+ svn_diff_conflict_display_modified_latest;
+
+ if (display_resolved_conflicts)
+ style = svn_diff_conflict_display_resolved_modified_latest;
+
+ if (display_original_in_conflict)
+ style = svn_diff_conflict_display_modified_original_latest;
+
+ return svn_diff_mem_string_output_merge2(output_stream,
+ diff,
+ original,
+ modified,
+ latest,
+ conflict_original,
+ conflict_modified,
+ conflict_latest,
+ conflict_separator,
+ style,
+ pool);
+}
diff --git a/subversion/libsvn_diff/diff_tree.c b/subversion/libsvn_diff/diff_tree.c
new file mode 100644
index 000000000000..8490179c48e7
--- /dev/null
+++ b/subversion/libsvn_diff/diff_tree.c
@@ -0,0 +1,1705 @@
+/*
+ * diff_tree.c : default diff tree processor
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include <assert.h>
+
+#include "svn_dirent_uri.h"
+#include "svn_error.h"
+#include "svn_io.h"
+#include "svn_pools.h"
+#include "svn_props.h"
+#include "svn_types.h"
+
+#include "private/svn_diff_tree.h"
+#include "svn_private_config.h"
+
+typedef struct tree_processor_t
+{
+ svn_diff_tree_processor_t tp;
+
+ /* void *future_extension */
+} tree_processor_t;
+
+
+static svn_error_t *
+default_dir_opened(void **new_dir_baton,
+ svn_boolean_t *skip,
+ svn_boolean_t *skip_children,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *parent_dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ *new_dir_baton = NULL;
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_dir_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->dir_closed(relpath, NULL, right_source,
+ dir_baton, processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_dir_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ /*const*/ apr_hash_t *left_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->dir_closed(relpath, left_source, NULL,
+ dir_baton, processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_dir_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ const apr_array_header_t *prop_changes,
+ void *dir_baton,
+ const struct svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->dir_closed(relpath,
+ left_source, right_source,
+ dir_baton,
+ processor, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_dir_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_file_opened(void **new_file_baton,
+ svn_boolean_t *skip,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ *new_file_baton = dir_baton;
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_file_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ const char *copyfrom_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->file_closed(relpath,
+ NULL, right_source,
+ file_baton, processor, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_file_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const char *left_file,
+ /*const*/ apr_hash_t *left_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->file_closed(relpath,
+ left_source, NULL,
+ file_baton, processor, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_file_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const char *left_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ svn_boolean_t file_modified,
+ const apr_array_header_t *prop_changes,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(processor->file_closed(relpath,
+ left_source, right_source,
+ file_baton, processor, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_file_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+default_node_absent(const char *relpath,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ return SVN_NO_ERROR;
+}
+
+svn_diff_tree_processor_t *
+svn_diff__tree_processor_create(void *baton,
+ apr_pool_t *result_pool)
+{
+ tree_processor_t *wrapper;
+ wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
+
+ wrapper->tp.baton = baton;
+
+ wrapper->tp.dir_opened = default_dir_opened;
+ wrapper->tp.dir_added = default_dir_added;
+ wrapper->tp.dir_deleted = default_dir_deleted;
+ wrapper->tp.dir_changed = default_dir_changed;
+ wrapper->tp.dir_closed = default_dir_closed;
+
+ wrapper->tp.file_opened = default_file_opened;
+ wrapper->tp.file_added = default_file_added;
+ wrapper->tp.file_deleted = default_file_deleted;
+ wrapper->tp.file_changed = default_file_changed;
+ wrapper->tp.file_closed = default_file_closed;
+
+ wrapper->tp.node_absent = default_node_absent;
+
+
+ return &wrapper->tp;
+}
+
+struct reverse_tree_baton_t
+{
+ const svn_diff_tree_processor_t *processor;
+ const char *prefix_relpath;
+};
+
+static svn_error_t *
+reverse_dir_opened(void **new_dir_baton,
+ svn_boolean_t *skip,
+ svn_boolean_t *skip_children,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *parent_dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->dir_opened(new_dir_baton, skip, skip_children,
+ relpath,
+ right_source, left_source,
+ NULL /* copyfrom */,
+ parent_dir_baton,
+ rb->processor,
+ result_pool, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_dir_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->dir_deleted(relpath,
+ right_source,
+ right_props,
+ dir_baton,
+ rb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_dir_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ /*const*/ apr_hash_t *left_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->dir_added(relpath,
+ NULL,
+ left_source,
+ NULL,
+ left_props,
+ dir_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_dir_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ const apr_array_header_t *prop_changes,
+ void *dir_baton,
+ const struct svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+ apr_array_header_t *reversed_prop_changes = NULL;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ if (prop_changes)
+ {
+ SVN_ERR_ASSERT(left_props != NULL && right_props != NULL);
+ SVN_ERR(svn_prop_diffs(&reversed_prop_changes, left_props, right_props,
+ scratch_pool));
+ }
+
+ SVN_ERR(rb->processor->dir_changed(relpath,
+ right_source,
+ left_source,
+ right_props,
+ left_props,
+ reversed_prop_changes,
+ dir_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_dir_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->dir_closed(relpath,
+ right_source,
+ left_source,
+ dir_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_file_opened(void **new_file_baton,
+ svn_boolean_t *skip,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->file_opened(new_file_baton,
+ skip,
+ relpath,
+ right_source,
+ left_source,
+ NULL /* copy_from */,
+ dir_baton,
+ rb->processor,
+ result_pool,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_file_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ const char *copyfrom_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->file_deleted(relpath,
+ right_source,
+ right_file,
+ right_props,
+ file_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_file_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const char *left_file,
+ /*const*/ apr_hash_t *left_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->file_added(relpath,
+ NULL /* copyfrom src */,
+ left_source,
+ NULL /* copyfrom file */,
+ left_file,
+ NULL /* copyfrom props */,
+ left_props,
+ file_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_file_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const char *left_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ svn_boolean_t file_modified,
+ const apr_array_header_t *prop_changes,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+ apr_array_header_t *reversed_prop_changes = NULL;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ if (prop_changes)
+ {
+ SVN_ERR_ASSERT(left_props != NULL && right_props != NULL);
+ SVN_ERR(svn_prop_diffs(&reversed_prop_changes, left_props, right_props,
+ scratch_pool));
+ }
+
+ SVN_ERR(rb->processor->file_changed(relpath,
+ right_source,
+ left_source,
+ right_file,
+ left_file,
+ right_props,
+ left_props,
+ file_modified,
+ reversed_prop_changes,
+ file_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_file_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->file_closed(relpath,
+ right_source,
+ left_source,
+ file_baton,
+ rb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+reverse_node_absent(const char *relpath,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct reverse_tree_baton_t *rb = processor->baton;
+
+ if (rb->prefix_relpath)
+ relpath = svn_relpath_join(rb->prefix_relpath, relpath, scratch_pool);
+
+ SVN_ERR(rb->processor->node_absent(relpath,
+ dir_baton,
+ rb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+
+const svn_diff_tree_processor_t *
+svn_diff__tree_processor_reverse_create(const svn_diff_tree_processor_t * processor,
+ const char *prefix_relpath,
+ apr_pool_t *result_pool)
+{
+ struct reverse_tree_baton_t *rb;
+ svn_diff_tree_processor_t *reverse;
+
+ rb = apr_pcalloc(result_pool, sizeof(*rb));
+ rb->processor = processor;
+ if (prefix_relpath)
+ rb->prefix_relpath = apr_pstrdup(result_pool, prefix_relpath);
+
+ reverse = svn_diff__tree_processor_create(rb, result_pool);
+
+ reverse->dir_opened = reverse_dir_opened;
+ reverse->dir_added = reverse_dir_added;
+ reverse->dir_deleted = reverse_dir_deleted;
+ reverse->dir_changed = reverse_dir_changed;
+ reverse->dir_closed = reverse_dir_closed;
+
+ reverse->file_opened = reverse_file_opened;
+ reverse->file_added = reverse_file_added;
+ reverse->file_deleted = reverse_file_deleted;
+ reverse->file_changed = reverse_file_changed;
+ reverse->file_closed = reverse_file_closed;
+
+ reverse->node_absent = reverse_node_absent;
+
+ return reverse;
+}
+
+struct filter_tree_baton_t
+{
+ const svn_diff_tree_processor_t *processor;
+ const char *prefix_relpath;
+};
+
+static svn_error_t *
+filter_dir_opened(void **new_dir_baton,
+ svn_boolean_t *skip,
+ svn_boolean_t *skip_children,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *parent_dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+
+ if (! relpath)
+ {
+ /* Skip work for this, but NOT for DESCENDANTS */
+ *skip = TRUE;
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(fb->processor->dir_opened(new_dir_baton, skip, skip_children,
+ relpath,
+ left_source, right_source,
+ copyfrom_source,
+ parent_dir_baton,
+ fb->processor,
+ result_pool, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_dir_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->dir_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_props,
+ right_props,
+ dir_baton,
+ fb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_dir_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ /*const*/ apr_hash_t *left_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->dir_deleted(relpath,
+ left_source,
+ left_props,
+ dir_baton,
+ fb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_dir_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ const apr_array_header_t *prop_changes,
+ void *dir_baton,
+ const struct svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->dir_changed(relpath,
+ left_source,
+ right_source,
+ left_props,
+ right_props,
+ prop_changes,
+ dir_baton,
+ fb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_dir_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->dir_closed(relpath,
+ left_source,
+ right_source,
+ dir_baton,
+ fb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_file_opened(void **new_file_baton,
+ svn_boolean_t *skip,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+
+ if (! relpath)
+ {
+ *skip = TRUE;
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(fb->processor->file_opened(new_file_baton,
+ skip,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ dir_baton,
+ fb->processor,
+ result_pool,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_file_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ const char *copyfrom_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->file_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_file,
+ right_file,
+ copyfrom_props,
+ right_props,
+ file_baton,
+ fb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_file_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const char *left_file,
+ /*const*/ apr_hash_t *left_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->file_deleted(relpath,
+ left_source,
+ left_file,
+ left_props,
+ file_baton,
+ fb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_file_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const char *left_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ svn_boolean_t file_modified,
+ const apr_array_header_t *prop_changes,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->file_changed(relpath,
+ left_source,
+ right_source,
+ left_file,
+ right_file,
+ left_props,
+ right_props,
+ file_modified,
+ prop_changes,
+ file_baton,
+ fb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_file_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->file_closed(relpath,
+ left_source,
+ right_source,
+ file_baton,
+ fb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+filter_node_absent(const char *relpath,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct filter_tree_baton_t *fb = processor->baton;
+
+ relpath = svn_relpath_skip_ancestor(fb->prefix_relpath, relpath);
+ assert(relpath != NULL); /* Driver error */
+
+ SVN_ERR(fb->processor->node_absent(relpath,
+ dir_baton,
+ fb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+
+const svn_diff_tree_processor_t *
+svn_diff__tree_processor_filter_create(const svn_diff_tree_processor_t * processor,
+ const char *prefix_relpath,
+ apr_pool_t *result_pool)
+{
+ struct filter_tree_baton_t *fb;
+ svn_diff_tree_processor_t *filter;
+
+ fb = apr_pcalloc(result_pool, sizeof(*fb));
+ fb->processor = processor;
+ if (prefix_relpath)
+ fb->prefix_relpath = apr_pstrdup(result_pool, prefix_relpath);
+
+ filter = svn_diff__tree_processor_create(fb, result_pool);
+
+ filter->dir_opened = filter_dir_opened;
+ filter->dir_added = filter_dir_added;
+ filter->dir_deleted = filter_dir_deleted;
+ filter->dir_changed = filter_dir_changed;
+ filter->dir_closed = filter_dir_closed;
+
+ filter->file_opened = filter_file_opened;
+ filter->file_added = filter_file_added;
+ filter->file_deleted = filter_file_deleted;
+ filter->file_changed = filter_file_changed;
+ filter->file_closed = filter_file_closed;
+
+ filter->node_absent = filter_node_absent;
+
+ return filter;
+}
+
+struct copy_as_changed_baton_t
+{
+ const svn_diff_tree_processor_t *processor;
+};
+
+static svn_error_t *
+copy_as_changed_dir_opened(void **new_dir_baton,
+ svn_boolean_t *skip,
+ svn_boolean_t *skip_children,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *parent_dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ if (!left_source && copyfrom_source)
+ {
+ assert(right_source != NULL);
+
+ left_source = copyfrom_source;
+ copyfrom_source = NULL;
+ }
+
+ SVN_ERR(cb->processor->dir_opened(new_dir_baton, skip, skip_children,
+ relpath,
+ left_source, right_source,
+ copyfrom_source,
+ parent_dir_baton,
+ cb->processor,
+ result_pool, scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_dir_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ if (copyfrom_source)
+ {
+ apr_array_header_t *propchanges;
+ SVN_ERR(svn_prop_diffs(&propchanges, right_props, copyfrom_props,
+ scratch_pool));
+ SVN_ERR(cb->processor->dir_changed(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_props,
+ right_props,
+ propchanges,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+ }
+ else
+ {
+ SVN_ERR(cb->processor->dir_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_props,
+ right_props,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_dir_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ /*const*/ apr_hash_t *left_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->dir_deleted(relpath,
+ left_source,
+ left_props,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_dir_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ const apr_array_header_t *prop_changes,
+ void *dir_baton,
+ const struct svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->dir_changed(relpath,
+ left_source,
+ right_source,
+ left_props,
+ right_props,
+ prop_changes,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_dir_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->dir_closed(relpath,
+ left_source,
+ right_source,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_file_opened(void **new_file_baton,
+ svn_boolean_t *skip,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ if (!left_source && copyfrom_source)
+ {
+ assert(right_source != NULL);
+
+ left_source = copyfrom_source;
+ copyfrom_source = NULL;
+ }
+
+ SVN_ERR(cb->processor->file_opened(new_file_baton,
+ skip,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ dir_baton,
+ cb->processor,
+ result_pool,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_file_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ const char *copyfrom_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ if (copyfrom_source)
+ {
+ apr_array_header_t *propchanges;
+ svn_boolean_t same;
+ SVN_ERR(svn_prop_diffs(&propchanges, right_props, copyfrom_props,
+ scratch_pool));
+
+ /* "" is sometimes a marker for just modified (E.g. no-textdeltas),
+ and it is certainly not a file */
+ if (*copyfrom_file && *right_file)
+ {
+ SVN_ERR(svn_io_files_contents_same_p(&same, copyfrom_file,
+ right_file, scratch_pool));
+ }
+ else
+ same = FALSE;
+
+ SVN_ERR(cb->processor->file_changed(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_file,
+ right_file,
+ copyfrom_props,
+ right_props,
+ !same,
+ propchanges,
+ file_baton,
+ cb->processor,
+ scratch_pool));
+ }
+ else
+ {
+ SVN_ERR(cb->processor->file_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_file,
+ right_file,
+ copyfrom_props,
+ right_props,
+ file_baton,
+ cb->processor,
+ scratch_pool));
+ }
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_file_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const char *left_file,
+ /*const*/ apr_hash_t *left_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->file_deleted(relpath,
+ left_source,
+ left_file,
+ left_props,
+ file_baton,
+ cb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_file_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const char *left_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ svn_boolean_t file_modified,
+ const apr_array_header_t *prop_changes,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->file_changed(relpath,
+ left_source,
+ right_source,
+ left_file,
+ right_file,
+ left_props,
+ right_props,
+ file_modified,
+ prop_changes,
+ file_baton,
+ cb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_file_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->file_closed(relpath,
+ left_source,
+ right_source,
+ file_baton,
+ cb->processor,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+copy_as_changed_node_absent(const char *relpath,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct copy_as_changed_baton_t *cb = processor->baton;
+
+ SVN_ERR(cb->processor->node_absent(relpath,
+ dir_baton,
+ cb->processor,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+
+const svn_diff_tree_processor_t *
+svn_diff__tree_processor_copy_as_changed_create(
+ const svn_diff_tree_processor_t * processor,
+ apr_pool_t *result_pool)
+{
+ struct copy_as_changed_baton_t *cb;
+ svn_diff_tree_processor_t *filter;
+
+ cb = apr_pcalloc(result_pool, sizeof(*cb));
+ cb->processor = processor;
+
+ filter = svn_diff__tree_processor_create(cb, result_pool);
+ filter->dir_opened = copy_as_changed_dir_opened;
+ filter->dir_added = copy_as_changed_dir_added;
+ filter->dir_deleted = copy_as_changed_dir_deleted;
+ filter->dir_changed = copy_as_changed_dir_changed;
+ filter->dir_closed = copy_as_changed_dir_closed;
+
+ filter->file_opened = copy_as_changed_file_opened;
+ filter->file_added = copy_as_changed_file_added;
+ filter->file_deleted = copy_as_changed_file_deleted;
+ filter->file_changed = copy_as_changed_file_changed;
+ filter->file_closed = copy_as_changed_file_closed;
+
+ filter->node_absent = copy_as_changed_node_absent;
+
+ return filter;
+}
+
+
+/* Processor baton for the tee tree processor */
+struct tee_baton_t
+{
+ const svn_diff_tree_processor_t *p1;
+ const svn_diff_tree_processor_t *p2;
+};
+
+/* Wrapper baton for file and directory batons in the tee processor */
+struct tee_node_baton_t
+{
+ void *baton1;
+ void *baton2;
+};
+
+static svn_error_t *
+tee_dir_opened(void **new_dir_baton,
+ svn_boolean_t *skip,
+ svn_boolean_t *skip_children,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *parent_dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *pb = parent_dir_baton;
+ struct tee_node_baton_t *nb = apr_pcalloc(result_pool, sizeof(*nb));
+
+ SVN_ERR(tb->p1->dir_opened(&(nb->baton1),
+ skip,
+ skip_children,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ pb ? pb->baton1 : NULL,
+ tb->p1,
+ result_pool,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->dir_opened(&(nb->baton2),
+ skip,
+ skip_children,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ pb ? pb->baton2 : NULL,
+ tb->p2,
+ result_pool,
+ scratch_pool));
+
+ *new_dir_baton = nb;
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_dir_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *db = dir_baton;
+
+ SVN_ERR(tb->p1->dir_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_props,
+ right_props,
+ db->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->dir_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_props,
+ right_props,
+ db->baton2,
+ tb->p2,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_dir_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ /*const*/ apr_hash_t *left_props,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *db = dir_baton;
+
+ SVN_ERR(tb->p1->dir_deleted(relpath,
+ left_source,
+ left_props,
+ db->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->dir_deleted(relpath,
+ left_source,
+ left_props,
+ db->baton2,
+ tb->p2,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_dir_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ const apr_array_header_t *prop_changes,
+ void *dir_baton,
+ const struct svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *db = dir_baton;
+
+ SVN_ERR(tb->p1->dir_changed(relpath,
+ left_source,
+ right_source,
+ left_props,
+ right_props,
+ prop_changes,
+ db->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->dir_changed(relpath,
+ left_source,
+ right_source,
+ left_props,
+ right_props,
+ prop_changes,
+ db->baton2,
+ tb->p2,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_dir_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *db = dir_baton;
+
+ SVN_ERR(tb->p1->dir_closed(relpath,
+ left_source,
+ right_source,
+ db->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->dir_closed(relpath,
+ left_source,
+ right_source,
+ db->baton2,
+ tb->p2,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_file_opened(void **new_file_baton,
+ svn_boolean_t *skip,
+ const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const svn_diff_source_t *copyfrom_source,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *pb = dir_baton;
+ struct tee_node_baton_t *nb = apr_pcalloc(result_pool, sizeof(*nb));
+
+ SVN_ERR(tb->p1->file_opened(&(nb->baton1),
+ skip,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ pb ? pb->baton1 : NULL,
+ tb->p1,
+ result_pool,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->file_opened(&(nb->baton2),
+ skip,
+ relpath,
+ left_source,
+ right_source,
+ copyfrom_source,
+ pb ? pb->baton2 : NULL,
+ tb->p2,
+ result_pool,
+ scratch_pool));
+
+ *new_file_baton = nb;
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_file_added(const char *relpath,
+ const svn_diff_source_t *copyfrom_source,
+ const svn_diff_source_t *right_source,
+ const char *copyfrom_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *copyfrom_props,
+ /*const*/ apr_hash_t *right_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *fb = file_baton;
+
+ SVN_ERR(tb->p1->file_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_file,
+ right_file,
+ copyfrom_props,
+ right_props,
+ fb->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->file_added(relpath,
+ copyfrom_source,
+ right_source,
+ copyfrom_file,
+ right_file,
+ copyfrom_props,
+ right_props,
+ fb->baton2,
+ tb->p2,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_file_deleted(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const char *left_file,
+ /*const*/ apr_hash_t *left_props,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *fb = file_baton;
+
+ SVN_ERR(tb->p1->file_deleted(relpath,
+ left_source,
+ left_file,
+ left_props,
+ fb->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->file_deleted(relpath,
+ left_source,
+ left_file,
+ left_props,
+ fb->baton2,
+ tb->p2,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_file_changed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ const char *left_file,
+ const char *right_file,
+ /*const*/ apr_hash_t *left_props,
+ /*const*/ apr_hash_t *right_props,
+ svn_boolean_t file_modified,
+ const apr_array_header_t *prop_changes,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *fb = file_baton;
+
+ SVN_ERR(tb->p1->file_changed(relpath,
+ left_source,
+ right_source,
+ left_file,
+ right_file,
+ left_props,
+ right_props,
+ file_modified,
+ prop_changes,
+ fb->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->file_changed(relpath,
+ left_source,
+ right_source,
+ left_file,
+ right_file,
+ left_props,
+ right_props,
+ file_modified,
+ prop_changes,
+ fb->baton2,
+ tb->p2,
+ scratch_pool));
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_file_closed(const char *relpath,
+ const svn_diff_source_t *left_source,
+ const svn_diff_source_t *right_source,
+ void *file_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *fb = file_baton;
+
+ SVN_ERR(tb->p1->file_closed(relpath,
+ left_source,
+ right_source,
+ fb->baton1,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->file_closed(relpath,
+ left_source,
+ right_source,
+ fb->baton2,
+ tb->p2,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+tee_node_absent(const char *relpath,
+ void *dir_baton,
+ const svn_diff_tree_processor_t *processor,
+ apr_pool_t *scratch_pool)
+{
+ struct tee_baton_t *tb = processor->baton;
+ struct tee_node_baton_t *db = dir_baton;
+
+ SVN_ERR(tb->p1->node_absent(relpath,
+ db ? db->baton1 : NULL,
+ tb->p1,
+ scratch_pool));
+
+ SVN_ERR(tb->p2->node_absent(relpath,
+ db ? db->baton2 : NULL,
+ tb->p2,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+const svn_diff_tree_processor_t *
+svn_diff__tree_processor_tee_create(const svn_diff_tree_processor_t *processor1,
+ const svn_diff_tree_processor_t *processor2,
+ apr_pool_t *result_pool)
+{
+ struct tee_baton_t *tb = apr_pcalloc(result_pool, sizeof(*tb));
+ svn_diff_tree_processor_t *tee;
+ tb->p1 = processor1;
+ tb->p2 = processor2;
+
+ tee = svn_diff__tree_processor_create(tb, result_pool);
+
+ tee->dir_opened = tee_dir_opened;
+ tee->dir_added = tee_dir_added;
+ tee->dir_deleted = tee_dir_deleted;
+ tee->dir_changed = tee_dir_changed;
+ tee->dir_closed = tee_dir_closed;
+ tee->file_opened = tee_file_opened;
+ tee->file_added = tee_file_added;
+ tee->file_deleted = tee_file_deleted;
+ tee->file_changed = tee_file_changed;
+ tee->file_closed = tee_file_closed;
+ tee->node_absent = tee_node_absent;
+
+ return tee;
+}
+
+svn_diff_source_t *
+svn_diff__source_create(svn_revnum_t revision,
+ apr_pool_t *result_pool)
+{
+ svn_diff_source_t *src = apr_pcalloc(result_pool, sizeof(*src));
+
+ src->revision = revision;
+ return src;
+}
diff --git a/subversion/libsvn_diff/lcs.c b/subversion/libsvn_diff/lcs.c
new file mode 100644
index 000000000000..8087a92f5c01
--- /dev/null
+++ b/subversion/libsvn_diff/lcs.c
@@ -0,0 +1,375 @@
+/*
+ * lcs.c : routines for creating an lcs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "diff.h"
+
+
+/*
+ * Calculate the Longest Common Subsequence (LCS) between two datasources.
+ * This function is what makes the diff code tick.
+ *
+ * The LCS algorithm implemented here is based on the approach described
+ * by Sun Wu, Udi Manber and Gene Meyers in "An O(NP) Sequence Comparison
+ * Algorithm", but has been modified for better performance.
+ *
+ * Let M and N be the lengths (number of tokens) of the two sources
+ * ('files'). The goal is to reach the end of both sources (files) with the
+ * minimum number of insertions + deletions. Since there is a known length
+ * difference N-M between the files, that is equivalent to just the minimum
+ * number of deletions, or equivalently the minimum number of insertions.
+ * For symmetry, we use the lesser number - deletions if M<N, insertions if
+ * M>N.
+ *
+ * Let 'k' be the difference in remaining length between the files, i.e.
+ * if we're at the beginning of both files, k=N-M, whereas k=0 for the
+ * 'end state', at the end of both files. An insertion will increase k by
+ * one, while a deletion decreases k by one. If k<0, then insertions are
+ * 'free' - we need those to reach the end state k=0 anyway - but deletions
+ * are costly: Adding a deletion means that we will have to add an additional
+ * insertion later to reach the end state, so it doesn't matter if we count
+ * deletions or insertions. Similarly, deletions are free for k>0.
+ *
+ * Let a 'state' be a given position in each file {pos1, pos2}. An array
+ * 'fp' keeps track of the best possible state (largest values of
+ * {pos1, pos2}) that can be achieved for a given cost 'p' (# moves away
+ * from k=0), as well as a linked list of what matches were used to reach
+ * that state. For each new value of p, we find for each value of k the
+ * best achievable state for that k - either by doing a costly operation
+ * (deletion if k<0) from a state achieved at a lower p, or doing a free
+ * operation (insertion if k<0) from a state achieved at the same p -
+ * and in both cases advancing past any matching regions found. This is
+ * handled by running loops over k in order of descending absolute value.
+ *
+ * A recent improvement of the algorithm is to ignore tokens that are unique
+ * to one file or the other, as those are known from the start to be
+ * impossible to match.
+ */
+
+typedef struct svn_diff__snake_t svn_diff__snake_t;
+
+struct svn_diff__snake_t
+{
+ apr_off_t y;
+ svn_diff__lcs_t *lcs;
+ svn_diff__position_t *position[2];
+};
+
+static APR_INLINE void
+svn_diff__snake(svn_diff__snake_t *fp_k,
+ svn_diff__token_index_t *token_counts[2],
+ svn_diff__lcs_t **freelist,
+ apr_pool_t *pool)
+{
+ svn_diff__position_t *start_position[2];
+ svn_diff__position_t *position[2];
+ svn_diff__lcs_t *lcs;
+ svn_diff__lcs_t *previous_lcs;
+
+ /* The previous entry at fp[k] is going to be replaced. See if we
+ * can mark that lcs node for reuse, because the sequence up to this
+ * point was a dead end.
+ */
+ lcs = fp_k[0].lcs;
+ while (lcs)
+ {
+ lcs->refcount--;
+ if (lcs->refcount)
+ break;
+
+ previous_lcs = lcs->next;
+ lcs->next = *freelist;
+ *freelist = lcs;
+ lcs = previous_lcs;
+ }
+
+ if (fp_k[-1].y >= fp_k[1].y)
+ {
+ start_position[0] = fp_k[-1].position[0];
+ start_position[1] = fp_k[-1].position[1]->next;
+
+ previous_lcs = fp_k[-1].lcs;
+ }
+ else
+ {
+ start_position[0] = fp_k[1].position[0]->next;
+ start_position[1] = fp_k[1].position[1];
+
+ previous_lcs = fp_k[1].lcs;
+ }
+
+
+ if (previous_lcs)
+ {
+ previous_lcs->refcount++;
+ }
+
+ /* ### Optimization, skip all positions that don't have matchpoints
+ * ### anyway. Beware of the sentinel, don't skip it!
+ */
+
+ position[0] = start_position[0];
+ position[1] = start_position[1];
+
+ while (1)
+ {
+ while (position[0]->token_index == position[1]->token_index)
+ {
+ position[0] = position[0]->next;
+ position[1] = position[1]->next;
+ }
+
+ if (position[1] != start_position[1])
+ {
+ lcs = *freelist;
+ if (lcs)
+ {
+ *freelist = lcs->next;
+ }
+ else
+ {
+ lcs = apr_palloc(pool, sizeof(*lcs));
+ }
+
+ lcs->position[0] = start_position[0];
+ lcs->position[1] = start_position[1];
+ lcs->length = position[1]->offset - start_position[1]->offset;
+ lcs->next = previous_lcs;
+ lcs->refcount = 1;
+ previous_lcs = lcs;
+ start_position[0] = position[0];
+ start_position[1] = position[1];
+ }
+
+ /* Skip any and all tokens that only occur in one of the files */
+ if (position[0]->token_index >= 0
+ && token_counts[1][position[0]->token_index] == 0)
+ start_position[0] = position[0] = position[0]->next;
+ else if (position[1]->token_index >= 0
+ && token_counts[0][position[1]->token_index] == 0)
+ start_position[1] = position[1] = position[1]->next;
+ else
+ break;
+ }
+
+ fp_k[0].lcs = previous_lcs;
+ fp_k[0].position[0] = position[0];
+ fp_k[0].position[1] = position[1];
+
+ fp_k[0].y = position[1]->offset;
+}
+
+
+static svn_diff__lcs_t *
+svn_diff__lcs_reverse(svn_diff__lcs_t *lcs)
+{
+ svn_diff__lcs_t *next;
+ svn_diff__lcs_t *prev;
+
+ next = NULL;
+ while (lcs != NULL)
+ {
+ prev = lcs->next;
+ lcs->next = next;
+ next = lcs;
+ lcs = prev;
+ }
+
+ return next;
+}
+
+
+/* Prepends a new lcs chunk for the amount of LINES at the given positions
+ * POS0_OFFSET and POS1_OFFSET to the given LCS chain, and returns it.
+ * This function assumes LINES > 0. */
+static svn_diff__lcs_t *
+prepend_lcs(svn_diff__lcs_t *lcs, apr_off_t lines,
+ apr_off_t pos0_offset, apr_off_t pos1_offset,
+ apr_pool_t *pool)
+{
+ svn_diff__lcs_t *new_lcs;
+
+ SVN_ERR_ASSERT_NO_RETURN(lines > 0);
+
+ new_lcs = apr_palloc(pool, sizeof(*new_lcs));
+ new_lcs->position[0] = apr_pcalloc(pool, sizeof(*new_lcs->position[0]));
+ new_lcs->position[0]->offset = pos0_offset;
+ new_lcs->position[1] = apr_pcalloc(pool, sizeof(*new_lcs->position[1]));
+ new_lcs->position[1]->offset = pos1_offset;
+ new_lcs->length = lines;
+ new_lcs->refcount = 1;
+ new_lcs->next = lcs;
+
+ return new_lcs;
+}
+
+
+svn_diff__lcs_t *
+svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
+ svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+ svn_diff__token_index_t *token_counts_list1, /* array of counts */
+ svn_diff__token_index_t *token_counts_list2, /* array of counts */
+ svn_diff__token_index_t num_tokens,
+ apr_off_t prefix_lines,
+ apr_off_t suffix_lines,
+ apr_pool_t *pool)
+{
+ apr_off_t length[2];
+ svn_diff__token_index_t *token_counts[2];
+ svn_diff__token_index_t unique_count[2];
+ svn_diff__token_index_t token_index;
+ svn_diff__snake_t *fp;
+ apr_off_t d;
+ apr_off_t k;
+ apr_off_t p = 0;
+ svn_diff__lcs_t *lcs, *lcs_freelist = NULL;
+
+ svn_diff__position_t sentinel_position[2];
+
+ /* Since EOF is always a sync point we tack on an EOF link
+ * with sentinel positions
+ */
+ lcs = apr_palloc(pool, sizeof(*lcs));
+ lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
+ lcs->position[0]->offset = position_list1
+ ? position_list1->offset + suffix_lines + 1
+ : prefix_lines + suffix_lines + 1;
+ lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
+ lcs->position[1]->offset = position_list2
+ ? position_list2->offset + suffix_lines + 1
+ : prefix_lines + suffix_lines + 1;
+ lcs->length = 0;
+ lcs->refcount = 1;
+ lcs->next = NULL;
+
+ if (position_list1 == NULL || position_list2 == NULL)
+ {
+ if (suffix_lines)
+ lcs = prepend_lcs(lcs, suffix_lines,
+ lcs->position[0]->offset - suffix_lines,
+ lcs->position[1]->offset - suffix_lines,
+ pool);
+ if (prefix_lines)
+ lcs = prepend_lcs(lcs, prefix_lines, 1, 1, pool);
+
+ return lcs;
+ }
+
+ unique_count[1] = unique_count[0] = 0;
+ for (token_index = 0; token_index < num_tokens; token_index++)
+ {
+ if (token_counts_list1[token_index] == 0)
+ unique_count[1] += token_counts_list2[token_index];
+ if (token_counts_list2[token_index] == 0)
+ unique_count[0] += token_counts_list1[token_index];
+ }
+
+ /* Calculate lengths M and N of the sequences to be compared. Do not
+ * count tokens unique to one file, as those are ignored in __snake.
+ */
+ length[0] = position_list1->offset - position_list1->next->offset + 1
+ - unique_count[0];
+ length[1] = position_list2->offset - position_list2->next->offset + 1
+ - unique_count[1];
+
+ /* strikerXXX: here we allocate the furthest point array, which is
+ * strikerXXX: sized M + N + 3 (!)
+ */
+ fp = apr_pcalloc(pool,
+ sizeof(*fp) * (apr_size_t)(length[0] + length[1] + 3));
+
+ /* The origo of fp corresponds to the end state, where we are
+ * at the end of both files. The valid states thus span from
+ * -N (at end of first file and at the beginning of the second
+ * file) to +M (the opposite :). Finally, svn_diff__snake needs
+ * 1 extra slot on each side to work.
+ */
+ fp += length[1] + 1;
+
+ sentinel_position[0].next = position_list1->next;
+ position_list1->next = &sentinel_position[0];
+ sentinel_position[0].offset = position_list1->offset + 1;
+ token_counts[0] = token_counts_list1;
+
+ sentinel_position[1].next = position_list2->next;
+ position_list2->next = &sentinel_position[1];
+ sentinel_position[1].offset = position_list2->offset + 1;
+ token_counts[1] = token_counts_list2;
+
+ /* Negative indices will not be used elsewhere
+ */
+ sentinel_position[0].token_index = -1;
+ sentinel_position[1].token_index = -2;
+
+ /* position d = M - N corresponds to the initial state, where
+ * we are at the beginning of both files.
+ */
+ d = length[0] - length[1];
+
+ /* k = d - 1 will be the first to be used to get previous
+ * position information from, make sure it holds sane
+ * data
+ */
+ fp[d - 1].position[0] = sentinel_position[0].next;
+ fp[d - 1].position[1] = &sentinel_position[1];
+
+ p = 0;
+ do
+ {
+ /* For k < 0, insertions are free */
+ for (k = (d < 0 ? d : 0) - p; k < 0; k++)
+ {
+ svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool);
+ }
+ /* for k > 0, deletions are free */
+ for (k = (d > 0 ? d : 0) + p; k >= 0; k--)
+ {
+ svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool);
+ }
+
+ p++;
+ }
+ while (fp[0].position[1] != &sentinel_position[1]);
+
+ if (suffix_lines)
+ lcs->next = prepend_lcs(fp[0].lcs, suffix_lines,
+ lcs->position[0]->offset - suffix_lines,
+ lcs->position[1]->offset - suffix_lines,
+ pool);
+ else
+ lcs->next = fp[0].lcs;
+
+ lcs = svn_diff__lcs_reverse(lcs);
+
+ position_list1->next = sentinel_position[0].next;
+ position_list2->next = sentinel_position[1].next;
+
+ if (prefix_lines)
+ return prepend_lcs(lcs, prefix_lines, 1, 1, pool);
+ else
+ return lcs;
+}
diff --git a/subversion/libsvn_diff/parse-diff.c b/subversion/libsvn_diff/parse-diff.c
new file mode 100644
index 000000000000..a01b4d52743b
--- /dev/null
+++ b/subversion/libsvn_diff/parse-diff.c
@@ -0,0 +1,1373 @@
+/*
+ * parse-diff.c: functions for parsing diff files
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "svn_hash.h"
+#include "svn_types.h"
+#include "svn_error.h"
+#include "svn_io.h"
+#include "svn_pools.h"
+#include "svn_props.h"
+#include "svn_string.h"
+#include "svn_utf.h"
+#include "svn_dirent_uri.h"
+#include "svn_diff.h"
+
+#include "private/svn_eol_private.h"
+#include "private/svn_dep_compat.h"
+
+/* Helper macro for readability */
+#define starts_with(str, start) \
+ (strncmp((str), (start), strlen(start)) == 0)
+
+/* Like strlen() but for string literals. */
+#define STRLEN_LITERAL(str) (sizeof(str) - 1)
+
+/* This struct describes a range within a file, as well as the
+ * current cursor position within the range. All numbers are in bytes. */
+struct svn_diff__hunk_range {
+ apr_off_t start;
+ apr_off_t end;
+ apr_off_t current;
+};
+
+struct svn_diff_hunk_t {
+ /* The patch this hunk belongs to. */
+ svn_patch_t *patch;
+
+ /* APR file handle to the patch file this hunk came from. */
+ apr_file_t *apr_file;
+
+ /* Ranges used to keep track of this hunk's texts positions within
+ * the patch file. */
+ struct svn_diff__hunk_range diff_text_range;
+ struct svn_diff__hunk_range original_text_range;
+ struct svn_diff__hunk_range modified_text_range;
+
+ /* Hunk ranges as they appeared in the patch file.
+ * All numbers are lines, not bytes. */
+ svn_linenum_t original_start;
+ svn_linenum_t original_length;
+ svn_linenum_t modified_start;
+ svn_linenum_t modified_length;
+
+ /* Number of lines of leading and trailing hunk context. */
+ svn_linenum_t leading_context;
+ svn_linenum_t trailing_context;
+};
+
+void
+svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
+{
+ hunk->diff_text_range.current = hunk->diff_text_range.start;
+}
+
+void
+svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
+{
+ if (hunk->patch->reverse)
+ hunk->modified_text_range.current = hunk->modified_text_range.start;
+ else
+ hunk->original_text_range.current = hunk->original_text_range.start;
+}
+
+void
+svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
+{
+ if (hunk->patch->reverse)
+ hunk->original_text_range.current = hunk->original_text_range.start;
+ else
+ hunk->modified_text_range.current = hunk->modified_text_range.start;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
+{
+ return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
+{
+ return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
+{
+ return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
+{
+ return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
+{
+ return hunk->leading_context;
+}
+
+svn_linenum_t
+svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
+{
+ return hunk->trailing_context;
+}
+
+/* Try to parse a positive number from a decimal number encoded
+ * in the string NUMBER. Return parsed number in OFFSET, and return
+ * TRUE if parsing was successful. */
+static svn_boolean_t
+parse_offset(svn_linenum_t *offset, const char *number)
+{
+ svn_error_t *err;
+ apr_uint64_t val;
+
+ err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
+ if (err)
+ {
+ svn_error_clear(err);
+ return FALSE;
+ }
+
+ *offset = (svn_linenum_t)val;
+
+ return TRUE;
+}
+
+/* Try to parse a hunk range specification from the string RANGE.
+ * Return parsed information in *START and *LENGTH, and return TRUE
+ * if the range parsed correctly. Note: This function may modify the
+ * input value RANGE. */
+static svn_boolean_t
+parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
+{
+ char *comma;
+
+ if (*range == 0)
+ return FALSE;
+
+ comma = strstr(range, ",");
+ if (comma)
+ {
+ if (strlen(comma + 1) > 0)
+ {
+ /* Try to parse the length. */
+ if (! parse_offset(length, comma + 1))
+ return FALSE;
+
+ /* Snip off the end of the string,
+ * so we can comfortably parse the line
+ * number the hunk starts at. */
+ *comma = '\0';
+ }
+ else
+ /* A comma but no length? */
+ return FALSE;
+ }
+ else
+ {
+ *length = 1;
+ }
+
+ /* Try to parse the line number the hunk starts at. */
+ return parse_offset(start, range);
+}
+
+/* Try to parse a hunk header in string HEADER, putting parsed information
+ * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
+ * character string used to delimit the hunk header.
+ * Do all allocations in POOL. */
+static svn_boolean_t
+parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
+ const char *atat, apr_pool_t *pool)
+{
+ const char *p;
+ const char *start;
+ svn_stringbuf_t *range;
+
+ p = header + strlen(atat);
+ if (*p != ' ')
+ /* No. */
+ return FALSE;
+ p++;
+ if (*p != '-')
+ /* Nah... */
+ return FALSE;
+ /* OK, this may be worth allocating some memory for... */
+ range = svn_stringbuf_create_ensure(31, pool);
+ start = ++p;
+ while (*p && *p != ' ')
+ {
+ p++;
+ }
+
+ if (*p != ' ')
+ /* No no no... */
+ return FALSE;
+
+ svn_stringbuf_appendbytes(range, start, p - start);
+
+ /* Try to parse the first range. */
+ if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
+ return FALSE;
+
+ /* Clear the stringbuf so we can reuse it for the second range. */
+ svn_stringbuf_setempty(range);
+ p++;
+ if (*p != '+')
+ /* Eeek! */
+ return FALSE;
+ /* OK, this may be worth copying... */
+ start = ++p;
+ while (*p && *p != ' ')
+ {
+ p++;
+ }
+ if (*p != ' ')
+ /* No no no... */
+ return FALSE;
+
+ svn_stringbuf_appendbytes(range, start, p - start);
+
+ /* Check for trailing @@ */
+ p++;
+ if (! starts_with(p, atat))
+ return FALSE;
+
+ /* There may be stuff like C-function names after the trailing @@,
+ * but we ignore that. */
+
+ /* Try to parse the second range. */
+ if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
+ return FALSE;
+
+ /* Hunk header is good. */
+ return TRUE;
+}
+
+/* Read a line of original or modified hunk text from the specified
+ * RANGE within FILE. FILE is expected to contain unidiff text.
+ * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
+ * Any lines commencing with the VERBOTEN character are discarded.
+ * VERBOTEN should be '+' or '-', depending on which form of hunk text
+ * is being read.
+ *
+ * All other parameters are as in svn_diff_hunk_readline_original_text()
+ * and svn_diff_hunk_readline_modified_text().
+ */
+static svn_error_t *
+hunk_readline_original_or_modified(apr_file_t *file,
+ struct svn_diff__hunk_range *range,
+ svn_stringbuf_t **stringbuf,
+ const char **eol,
+ svn_boolean_t *eof,
+ char verboten,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ apr_size_t max_len;
+ svn_boolean_t filtered;
+ apr_off_t pos;
+ svn_stringbuf_t *str;
+
+ if (range->current >= range->end)
+ {
+ /* We're past the range. Indicate that no bytes can be read. */
+ *eof = TRUE;
+ if (eol)
+ *eol = NULL;
+ *stringbuf = svn_stringbuf_create_empty(result_pool);
+ return SVN_NO_ERROR;
+ }
+
+ pos = 0;
+ SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool));
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
+ do
+ {
+ max_len = range->end - range->current;
+ SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
+ result_pool, scratch_pool));
+ range->current = 0;
+ SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool));
+ filtered = (str->data[0] == verboten || str->data[0] == '\\');
+ }
+ while (filtered && ! *eof);
+
+ if (filtered)
+ {
+ /* EOF, return an empty string. */
+ *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
+ }
+ else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
+ {
+ /* Shave off leading unidiff symbols. */
+ *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
+ }
+ else
+ {
+ /* Return the line as-is. */
+ *stringbuf = svn_stringbuf_dup(str, result_pool);
+ }
+
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
+ svn_stringbuf_t **stringbuf,
+ const char **eol,
+ svn_boolean_t *eof,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ return svn_error_trace(
+ hunk_readline_original_or_modified(hunk->apr_file,
+ hunk->patch->reverse ?
+ &hunk->modified_text_range :
+ &hunk->original_text_range,
+ stringbuf, eol, eof,
+ hunk->patch->reverse ? '-' : '+',
+ result_pool, scratch_pool));
+}
+
+svn_error_t *
+svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
+ svn_stringbuf_t **stringbuf,
+ const char **eol,
+ svn_boolean_t *eof,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ return svn_error_trace(
+ hunk_readline_original_or_modified(hunk->apr_file,
+ hunk->patch->reverse ?
+ &hunk->original_text_range :
+ &hunk->modified_text_range,
+ stringbuf, eol, eof,
+ hunk->patch->reverse ? '+' : '-',
+ result_pool, scratch_pool));
+}
+
+svn_error_t *
+svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
+ svn_stringbuf_t **stringbuf,
+ const char **eol,
+ svn_boolean_t *eof,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_diff_hunk_t dummy;
+ svn_stringbuf_t *line;
+ apr_size_t max_len;
+ apr_off_t pos;
+
+ if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
+ {
+ /* We're past the range. Indicate that no bytes can be read. */
+ *eof = TRUE;
+ if (eol)
+ *eol = NULL;
+ *stringbuf = svn_stringbuf_create_empty(result_pool);
+ return SVN_NO_ERROR;
+ }
+
+ pos = 0;
+ SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool));
+ SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
+ &hunk->diff_text_range.current, scratch_pool));
+ max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
+ SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
+ result_pool,
+ scratch_pool));
+ hunk->diff_text_range.current = 0;
+ SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR,
+ &hunk->diff_text_range.current, scratch_pool));
+ SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
+
+ if (hunk->patch->reverse)
+ {
+ if (parse_hunk_header(line->data, &dummy, "@@", scratch_pool))
+ {
+ /* Line is a hunk header, reverse it. */
+ line = svn_stringbuf_createf(result_pool,
+ "@@ -%lu,%lu +%lu,%lu @@",
+ hunk->modified_start,
+ hunk->modified_length,
+ hunk->original_start,
+ hunk->original_length);
+ }
+ else if (parse_hunk_header(line->data, &dummy, "##", scratch_pool))
+ {
+ /* Line is a hunk header, reverse it. */
+ line = svn_stringbuf_createf(result_pool,
+ "## -%lu,%lu +%lu,%lu ##",
+ hunk->modified_start,
+ hunk->modified_length,
+ hunk->original_start,
+ hunk->original_length);
+ }
+ else
+ {
+ if (line->data[0] == '+')
+ line->data[0] = '-';
+ else if (line->data[0] == '-')
+ line->data[0] = '+';
+ }
+ }
+
+ *stringbuf = line;
+
+ return SVN_NO_ERROR;
+}
+
+/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
+ * Allocate *PROP_NAME in RESULT_POOL.
+ * Set *PROP_NAME to NULL if no valid property name was found. */
+static svn_error_t *
+parse_prop_name(const char **prop_name, const char *header,
+ const char *indicator, apr_pool_t *result_pool)
+{
+ SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
+ header + strlen(indicator),
+ result_pool));
+ if (**prop_name == '\0')
+ *prop_name = NULL;
+ else if (! svn_prop_name_is_valid(*prop_name))
+ {
+ svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
+ svn_stringbuf_strip_whitespace(buf);
+ *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Return the next *HUNK from a PATCH in APR_FILE.
+ * If no hunk can be found, set *HUNK to NULL.
+ * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
+ * is the first belonging to a certain property, then PROP_NAME and
+ * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
+ * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
+ * treated as context lines. Allocate results in RESULT_POOL.
+ * Use SCRATCH_POOL for all other allocations. */
+static svn_error_t *
+parse_next_hunk(svn_diff_hunk_t **hunk,
+ svn_boolean_t *is_property,
+ const char **prop_name,
+ svn_diff_operation_kind_t *prop_operation,
+ svn_patch_t *patch,
+ apr_file_t *apr_file,
+ svn_boolean_t ignore_whitespace,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ static const char * const minus = "--- ";
+ static const char * const text_atat = "@@";
+ static const char * const prop_atat = "##";
+ svn_stringbuf_t *line;
+ svn_boolean_t eof, in_hunk, hunk_seen;
+ apr_off_t pos, last_line;
+ apr_off_t start, end;
+ apr_off_t original_end;
+ apr_off_t modified_end;
+ svn_linenum_t original_lines;
+ svn_linenum_t modified_lines;
+ svn_linenum_t leading_context;
+ svn_linenum_t trailing_context;
+ svn_boolean_t changed_line_seen;
+ enum {
+ noise_line,
+ original_line,
+ modified_line,
+ context_line
+ } last_line_type;
+ apr_pool_t *iterpool;
+
+ *prop_operation = svn_diff_op_unchanged;
+
+ /* We only set this if we have a property hunk header. */
+ *prop_name = NULL;
+ *is_property = FALSE;
+
+ if (apr_file_eof(apr_file) == APR_EOF)
+ {
+ /* No more hunks here. */
+ *hunk = NULL;
+ return SVN_NO_ERROR;
+ }
+
+ in_hunk = FALSE;
+ hunk_seen = FALSE;
+ leading_context = 0;
+ trailing_context = 0;
+ changed_line_seen = FALSE;
+ original_end = 0;
+ modified_end = 0;
+ *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
+
+ /* Get current seek position -- APR has no ftell() :( */
+ pos = 0;
+ SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool));
+
+ /* Start out assuming noise. */
+ last_line_type = noise_line;
+
+ iterpool = svn_pool_create(scratch_pool);
+ do
+ {
+
+ svn_pool_clear(iterpool);
+
+ /* Remember the current line's offset, and read the line. */
+ last_line = pos;
+ SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
+ iterpool, iterpool));
+
+ /* Update line offset for next iteration. */
+ pos = 0;
+ SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool));
+
+ /* Lines starting with a backslash indicate a missing EOL:
+ * "\ No newline at end of file" or "end of property". */
+ if (line->data[0] == '\\')
+ {
+ if (in_hunk)
+ {
+ char eolbuf[2];
+ apr_size_t len;
+ apr_off_t off;
+ apr_off_t hunk_text_end;
+
+ /* Comment terminates the hunk text and says the hunk text
+ * has no trailing EOL. Snip off trailing EOL which is part
+ * of the patch file but not part of the hunk text. */
+ off = last_line - 2;
+ SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
+ len = sizeof(eolbuf);
+ SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
+ &eof, iterpool));
+ if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
+ hunk_text_end = last_line - 2;
+ else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
+ hunk_text_end = last_line - 1;
+ else
+ hunk_text_end = last_line;
+
+ if (last_line_type == original_line && original_end == 0)
+ original_end = hunk_text_end;
+ else if (last_line_type == modified_line && modified_end == 0)
+ modified_end = hunk_text_end;
+ else if (last_line_type == context_line)
+ {
+ if (original_end == 0)
+ original_end = hunk_text_end;
+ if (modified_end == 0)
+ modified_end = hunk_text_end;
+ }
+
+ SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
+ }
+
+ continue;
+ }
+
+ if (in_hunk)
+ {
+ char c;
+ static const char add = '+';
+ static const char del = '-';
+
+ if (! hunk_seen)
+ {
+ /* We're reading the first line of the hunk, so the start
+ * of the line just read is the hunk text's byte offset. */
+ start = last_line;
+ }
+
+ c = line->data[0];
+ if (original_lines > 0 && modified_lines > 0 &&
+ ((c == ' ')
+ /* Tolerate chopped leading spaces on empty lines. */
+ || (! eof && line->len == 0)
+ /* Maybe tolerate chopped leading spaces on non-empty lines. */
+ || (ignore_whitespace && c != del && c != add)))
+ {
+ /* It's a "context" line in the hunk. */
+ hunk_seen = TRUE;
+ original_lines--;
+ modified_lines--;
+ if (changed_line_seen)
+ trailing_context++;
+ else
+ leading_context++;
+ last_line_type = context_line;
+ }
+ else if (original_lines > 0 && c == del)
+ {
+ /* It's a "deleted" line in the hunk. */
+ hunk_seen = TRUE;
+ changed_line_seen = TRUE;
+
+ /* A hunk may have context in the middle. We only want
+ trailing lines of context. */
+ if (trailing_context > 0)
+ trailing_context = 0;
+
+ original_lines--;
+ last_line_type = original_line;
+ }
+ else if (modified_lines > 0 && c == add)
+ {
+ /* It's an "added" line in the hunk. */
+ hunk_seen = TRUE;
+ changed_line_seen = TRUE;
+
+ /* A hunk may have context in the middle. We only want
+ trailing lines of context. */
+ if (trailing_context > 0)
+ trailing_context = 0;
+
+ modified_lines--;
+ last_line_type = modified_line;
+ }
+ else
+ {
+ if (eof)
+ {
+ /* The hunk ends at EOF. */
+ end = pos;
+ }
+ else
+ {
+ /* The start of the current line marks the first byte
+ * after the hunk text. */
+ end = last_line;
+ }
+
+ if (original_end == 0)
+ original_end = end;
+ if (modified_end == 0)
+ modified_end = end;
+ break; /* Hunk was empty or has been read. */
+ }
+ }
+ else
+ {
+ if (starts_with(line->data, text_atat))
+ {
+ /* Looks like we have a hunk header, try to rip it apart. */
+ in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
+ iterpool);
+ if (in_hunk)
+ {
+ original_lines = (*hunk)->original_length;
+ modified_lines = (*hunk)->modified_length;
+ *is_property = FALSE;
+ }
+ }
+ else if (starts_with(line->data, prop_atat))
+ {
+ /* Looks like we have a property hunk header, try to rip it
+ * apart. */
+ in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
+ iterpool);
+ if (in_hunk)
+ {
+ original_lines = (*hunk)->original_length;
+ modified_lines = (*hunk)->modified_length;
+ *is_property = TRUE;
+ }
+ }
+ else if (starts_with(line->data, "Added: "))
+ {
+ SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
+ result_pool));
+ if (*prop_name)
+ *prop_operation = svn_diff_op_added;
+ }
+ else if (starts_with(line->data, "Deleted: "))
+ {
+ SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
+ result_pool));
+ if (*prop_name)
+ *prop_operation = svn_diff_op_deleted;
+ }
+ else if (starts_with(line->data, "Modified: "))
+ {
+ SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
+ result_pool));
+ if (*prop_name)
+ *prop_operation = svn_diff_op_modified;
+ }
+ else if (starts_with(line->data, minus)
+ || starts_with(line->data, "diff --git "))
+ /* This could be a header of another patch. Bail out. */
+ break;
+ }
+ }
+ /* Check for the line length since a file may not have a newline at the
+ * end and we depend upon the last line to be an empty one. */
+ while (! eof || line->len > 0);
+ svn_pool_destroy(iterpool);
+
+ if (! eof)
+ /* Rewind to the start of the line just read, so subsequent calls
+ * to this function or svn_diff_parse_next_patch() don't end
+ * up skipping the line -- it may contain a patch or hunk header. */
+ SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
+
+ if (hunk_seen && start < end)
+ {
+ (*hunk)->patch = patch;
+ (*hunk)->apr_file = apr_file;
+ (*hunk)->leading_context = leading_context;
+ (*hunk)->trailing_context = trailing_context;
+ (*hunk)->diff_text_range.start = start;
+ (*hunk)->diff_text_range.current = start;
+ (*hunk)->diff_text_range.end = end;
+ (*hunk)->original_text_range.start = start;
+ (*hunk)->original_text_range.current = start;
+ (*hunk)->original_text_range.end = original_end;
+ (*hunk)->modified_text_range.start = start;
+ (*hunk)->modified_text_range.current = start;
+ (*hunk)->modified_text_range.end = modified_end;
+ }
+ else
+ /* Something went wrong, just discard the result. */
+ *hunk = NULL;
+
+ return SVN_NO_ERROR;
+}
+
+/* Compare function for sorting hunks after parsing.
+ * We sort hunks by their original line offset. */
+static int
+compare_hunks(const void *a, const void *b)
+{
+ const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
+ const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
+
+ if (ha->original_start < hb->original_start)
+ return -1;
+ if (ha->original_start > hb->original_start)
+ return 1;
+ return 0;
+}
+
+/* Possible states of the diff header parser. */
+enum parse_state
+{
+ state_start, /* initial */
+ state_git_diff_seen, /* diff --git */
+ state_git_tree_seen, /* a tree operation, rather then content change */
+ state_git_minus_seen, /* --- /dev/null; or --- a/ */
+ state_git_plus_seen, /* +++ /dev/null; or +++ a/ */
+ state_move_from_seen, /* rename from foo.c */
+ state_copy_from_seen, /* copy from foo.c */
+ state_minus_seen, /* --- foo.c */
+ state_unidiff_found, /* valid start of a regular unidiff header */
+ state_git_header_found /* valid start of a --git diff header */
+};
+
+/* Data type describing a valid state transition of the parser. */
+struct transition
+{
+ const char *expected_input;
+ enum parse_state required_state;
+
+ /* A callback called upon each parser state transition. */
+ svn_error_t *(*fn)(enum parse_state *new_state, char *input,
+ svn_patch_t *patch, apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool);
+};
+
+/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
+static svn_error_t *
+grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ const char *utf8_path;
+ const char *canon_path;
+
+ /* Grab the filename and encode it in UTF-8. */
+ /* TODO: Allow specifying the patch file's encoding.
+ * For now, we assume its encoding is native. */
+ /* ### This can fail if the filename cannot be represented in the current
+ * ### locale's encoding. */
+ SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
+ line,
+ scratch_pool));
+
+ /* Canonicalize the path name. */
+ canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
+
+ *file_name = apr_pstrdup(result_pool, canon_path);
+
+ return SVN_NO_ERROR;
+}
+
+/* Parse the '--- ' line of a regular unidiff. */
+static svn_error_t *
+diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ /* If we can find a tab, it separates the filename from
+ * the rest of the line which we can discard. */
+ char *tab = strchr(line, '\t');
+ if (tab)
+ *tab = '\0';
+
+ SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
+ result_pool, scratch_pool));
+
+ *new_state = state_minus_seen;
+
+ return SVN_NO_ERROR;
+}
+
+/* Parse the '+++ ' line of a regular unidiff. */
+static svn_error_t *
+diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ /* If we can find a tab, it separates the filename from
+ * the rest of the line which we can discard. */
+ char *tab = strchr(line, '\t');
+ if (tab)
+ *tab = '\0';
+
+ SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
+ result_pool, scratch_pool));
+
+ *new_state = state_unidiff_found;
+
+ return SVN_NO_ERROR;
+}
+
+/* Parse the first line of a git extended unidiff. */
+static svn_error_t *
+git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ const char *old_path_start;
+ char *old_path_end;
+ const char *new_path_start;
+ const char *new_path_end;
+ char *new_path_marker;
+ const char *old_path_marker;
+
+ /* ### Add handling of escaped paths
+ * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
+ *
+ * TAB, LF, double quote and backslash characters in pathnames are
+ * represented as \t, \n, \" and \\, respectively. If there is need for
+ * such substitution then the whole pathname is put in double quotes.
+ */
+
+ /* Our line should look like this: 'diff --git a/path b/path'.
+ *
+ * If we find any deviations from that format, we return with state reset
+ * to start.
+ */
+ old_path_marker = strstr(line, " a/");
+
+ if (! old_path_marker)
+ {
+ *new_state = state_start;
+ return SVN_NO_ERROR;
+ }
+
+ if (! *(old_path_marker + 3))
+ {
+ *new_state = state_start;
+ return SVN_NO_ERROR;
+ }
+
+ new_path_marker = strstr(old_path_marker, " b/");
+
+ if (! new_path_marker)
+ {
+ *new_state = state_start;
+ return SVN_NO_ERROR;
+ }
+
+ if (! *(new_path_marker + 3))
+ {
+ *new_state = state_start;
+ return SVN_NO_ERROR;
+ }
+
+ /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
+ * We only need the filenames when we have deleted or added empty
+ * files. In those cases the old_path and new_path is identical on the
+ * 'diff --git' line. For all other cases we fetch the filenames from
+ * other header lines. */
+ old_path_start = line + STRLEN_LITERAL("diff --git a/");
+ new_path_end = line + strlen(line);
+ new_path_start = old_path_start;
+
+ while (TRUE)
+ {
+ ptrdiff_t len_old;
+ ptrdiff_t len_new;
+
+ new_path_marker = strstr(new_path_start, " b/");
+
+ /* No new path marker, bail out. */
+ if (! new_path_marker)
+ break;
+
+ old_path_end = new_path_marker;
+ new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
+
+ /* No path after the marker. */
+ if (! *new_path_start)
+ break;
+
+ len_old = old_path_end - old_path_start;
+ len_new = new_path_end - new_path_start;
+
+ /* Are the paths before and after the " b/" marker the same? */
+ if (len_old == len_new
+ && ! strncmp(old_path_start, new_path_start, len_old))
+ {
+ *old_path_end = '\0';
+ SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
+ result_pool, scratch_pool));
+
+ SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
+ result_pool, scratch_pool));
+ break;
+ }
+ }
+
+ /* We assume that the path is only modified until we've found a 'tree'
+ * header */
+ patch->operation = svn_diff_op_modified;
+
+ *new_state = state_git_diff_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the '--- ' line of a git extended unidiff. */
+static svn_error_t *
+git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ /* If we can find a tab, it separates the filename from
+ * the rest of the line which we can discard. */
+ char *tab = strchr(line, '\t');
+ if (tab)
+ *tab = '\0';
+
+ if (starts_with(line, "--- /dev/null"))
+ SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
+ result_pool, scratch_pool));
+ else
+ SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
+ result_pool, scratch_pool));
+
+ *new_state = state_git_minus_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the '+++ ' line of a git extended unidiff. */
+static svn_error_t *
+git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ /* If we can find a tab, it separates the filename from
+ * the rest of the line which we can discard. */
+ char *tab = strchr(line, '\t');
+ if (tab)
+ *tab = '\0';
+
+ if (starts_with(line, "+++ /dev/null"))
+ SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
+ result_pool, scratch_pool));
+ else
+ SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
+ result_pool, scratch_pool));
+
+ *new_state = state_git_header_found;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'rename from ' line of a git extended unidiff. */
+static svn_error_t *
+git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ SVN_ERR(grab_filename(&patch->old_filename,
+ line + STRLEN_LITERAL("rename from "),
+ result_pool, scratch_pool));
+
+ *new_state = state_move_from_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'rename to ' line of a git extended unidiff. */
+static svn_error_t *
+git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ SVN_ERR(grab_filename(&patch->new_filename,
+ line + STRLEN_LITERAL("rename to "),
+ result_pool, scratch_pool));
+
+ patch->operation = svn_diff_op_moved;
+
+ *new_state = state_git_tree_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'copy from ' line of a git extended unidiff. */
+static svn_error_t *
+git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ SVN_ERR(grab_filename(&patch->old_filename,
+ line + STRLEN_LITERAL("copy from "),
+ result_pool, scratch_pool));
+
+ *new_state = state_copy_from_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'copy to ' line of a git extended unidiff. */
+static svn_error_t *
+git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
+ result_pool, scratch_pool));
+
+ patch->operation = svn_diff_op_copied;
+
+ *new_state = state_git_tree_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'new file ' line of a git extended unidiff. */
+static svn_error_t *
+git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ patch->operation = svn_diff_op_added;
+
+ /* Filename already retrieved from diff --git header. */
+
+ *new_state = state_git_tree_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Parse the 'deleted file ' line of a git extended unidiff. */
+static svn_error_t *
+git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ patch->operation = svn_diff_op_deleted;
+
+ /* Filename already retrieved from diff --git header. */
+
+ *new_state = state_git_tree_seen;
+ return SVN_NO_ERROR;
+}
+
+/* Add a HUNK associated with the property PROP_NAME to PATCH. */
+static svn_error_t *
+add_property_hunk(svn_patch_t *patch, const char *prop_name,
+ svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
+ apr_pool_t *result_pool)
+{
+ svn_prop_patch_t *prop_patch;
+
+ prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
+
+ if (! prop_patch)
+ {
+ prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
+ prop_patch->name = prop_name;
+ prop_patch->operation = operation;
+ prop_patch->hunks = apr_array_make(result_pool, 1,
+ sizeof(svn_diff_hunk_t *));
+
+ svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
+ }
+
+ APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
+
+ return SVN_NO_ERROR;
+}
+
+struct svn_patch_file_t
+{
+ /* The APR file handle to the patch file. */
+ apr_file_t *apr_file;
+
+ /* The file offset at which the next patch is expected. */
+ apr_off_t next_patch_offset;
+};
+
+svn_error_t *
+svn_diff_open_patch_file(svn_patch_file_t **patch_file,
+ const char *local_abspath,
+ apr_pool_t *result_pool)
+{
+ svn_patch_file_t *p;
+
+ p = apr_palloc(result_pool, sizeof(*p));
+ SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
+ APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
+ result_pool));
+ p->next_patch_offset = 0;
+ *patch_file = p;
+
+ return SVN_NO_ERROR;
+}
+
+/* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
+ * Parsing stops if no valid next hunk can be found.
+ * If IGNORE_WHITESPACE is TRUE, lines without
+ * leading spaces will be treated as context lines.
+ * Allocate results in RESULT_POOL.
+ * Use SCRATCH_POOL for temporary allocations. */
+static svn_error_t *
+parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
+ svn_boolean_t ignore_whitespace,
+ apr_pool_t *result_pool, apr_pool_t *scratch_pool)
+{
+ svn_diff_hunk_t *hunk;
+ svn_boolean_t is_property;
+ const char *last_prop_name;
+ const char *prop_name;
+ svn_diff_operation_kind_t prop_operation;
+ apr_pool_t *iterpool;
+
+ last_prop_name = NULL;
+
+ patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
+ patch->prop_patches = apr_hash_make(result_pool);
+ iterpool = svn_pool_create(scratch_pool);
+ do
+ {
+ svn_pool_clear(iterpool);
+
+ SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
+ patch, apr_file, ignore_whitespace, result_pool,
+ iterpool));
+
+ if (hunk && is_property)
+ {
+ if (! prop_name)
+ prop_name = last_prop_name;
+ else
+ last_prop_name = prop_name;
+ SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
+ result_pool));
+ }
+ else if (hunk)
+ {
+ APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
+ last_prop_name = NULL;
+ }
+
+ }
+ while (hunk);
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+/* State machine for the diff header parser.
+ * Expected Input Required state Function to call */
+static struct transition transitions[] =
+{
+ {"--- ", state_start, diff_minus},
+ {"+++ ", state_minus_seen, diff_plus},
+ {"diff --git", state_start, git_start},
+ {"--- a/", state_git_diff_seen, git_minus},
+ {"--- a/", state_git_tree_seen, git_minus},
+ {"--- /dev/null", state_git_tree_seen, git_minus},
+ {"+++ b/", state_git_minus_seen, git_plus},
+ {"+++ /dev/null", state_git_minus_seen, git_plus},
+ {"rename from ", state_git_diff_seen, git_move_from},
+ {"rename to ", state_move_from_seen, git_move_to},
+ {"copy from ", state_git_diff_seen, git_copy_from},
+ {"copy to ", state_copy_from_seen, git_copy_to},
+ {"new file ", state_git_diff_seen, git_new_file},
+ {"deleted file ", state_git_diff_seen, git_deleted_file},
+};
+
+svn_error_t *
+svn_diff_parse_next_patch(svn_patch_t **patch,
+ svn_patch_file_t *patch_file,
+ svn_boolean_t reverse,
+ svn_boolean_t ignore_whitespace,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ apr_off_t pos, last_line;
+ svn_boolean_t eof;
+ svn_boolean_t line_after_tree_header_read = FALSE;
+ apr_pool_t *iterpool;
+ enum parse_state state = state_start;
+
+ if (apr_file_eof(patch_file->apr_file) == APR_EOF)
+ {
+ /* No more patches here. */
+ *patch = NULL;
+ return SVN_NO_ERROR;
+ }
+
+ *patch = apr_pcalloc(result_pool, sizeof(**patch));
+
+ pos = patch_file->next_patch_offset;
+ SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
+
+ iterpool = svn_pool_create(scratch_pool);
+ do
+ {
+ svn_stringbuf_t *line;
+ svn_boolean_t valid_header_line = FALSE;
+ int i;
+
+ svn_pool_clear(iterpool);
+
+ /* Remember the current line's offset, and read the line. */
+ last_line = pos;
+ SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
+ APR_SIZE_MAX, iterpool, iterpool));
+
+ if (! eof)
+ {
+ /* Update line offset for next iteration. */
+ pos = 0;
+ SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos,
+ iterpool));
+ }
+
+ /* Run the state machine. */
+ for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
+ {
+ if (starts_with(line->data, transitions[i].expected_input)
+ && state == transitions[i].required_state)
+ {
+ SVN_ERR(transitions[i].fn(&state, line->data, *patch,
+ result_pool, iterpool));
+ valid_header_line = TRUE;
+ break;
+ }
+ }
+
+ if (state == state_unidiff_found || state == state_git_header_found)
+ {
+ /* We have a valid diff header, yay! */
+ break;
+ }
+ else if (state == state_git_tree_seen && line_after_tree_header_read)
+ {
+ /* git patches can contain an index line after the file mode line */
+ if (!starts_with(line->data, "index "))
+ {
+ /* We have a valid diff header for a patch with only tree changes.
+ * Rewind to the start of the line just read, so subsequent calls
+ * to this function don't end up skipping the line -- it may
+ * contain a patch. */
+ SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
+ scratch_pool));
+ break;
+ }
+ }
+ else if (state == state_git_tree_seen)
+ {
+ line_after_tree_header_read = TRUE;
+ }
+ else if (! valid_header_line && state != state_start
+ && !starts_with(line->data, "index "))
+ {
+ /* We've encountered an invalid diff header.
+ *
+ * Rewind to the start of the line just read - it may be a new
+ * header that begins there. */
+ SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
+ scratch_pool));
+ state = state_start;
+ }
+
+ }
+ while (! eof);
+
+ (*patch)->reverse = reverse;
+ if (reverse)
+ {
+ const char *temp;
+ temp = (*patch)->old_filename;
+ (*patch)->old_filename = (*patch)->new_filename;
+ (*patch)->new_filename = temp;
+ }
+
+ if ((*patch)->old_filename == NULL || (*patch)->new_filename == NULL)
+ {
+ /* Something went wrong, just discard the result. */
+ *patch = NULL;
+ }
+ else
+ SVN_ERR(parse_hunks(*patch, patch_file->apr_file, ignore_whitespace,
+ result_pool, iterpool));
+
+ svn_pool_destroy(iterpool);
+
+ patch_file->next_patch_offset = 0;
+ SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR,
+ &patch_file->next_patch_offset, scratch_pool));
+
+ if (*patch)
+ {
+ /* Usually, hunks appear in the patch sorted by their original line
+ * offset. But just in case they weren't parsed in this order for
+ * some reason, we sort them so that our caller can assume that hunks
+ * are sorted as if parsed from a usual patch. */
+ qsort((*patch)->hunks->elts, (*patch)->hunks->nelts,
+ (*patch)->hunks->elt_size, compare_hunks);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff_close_patch_file(svn_patch_file_t *patch_file,
+ apr_pool_t *scratch_pool)
+{
+ return svn_error_trace(svn_io_file_close(patch_file->apr_file,
+ scratch_pool));
+}
diff --git a/subversion/libsvn_diff/token.c b/subversion/libsvn_diff/token.c
new file mode 100644
index 000000000000..6388d9f070e8
--- /dev/null
+++ b/subversion/libsvn_diff/token.c
@@ -0,0 +1,198 @@
+/*
+ * token.c : routines for doing diffs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_pools.h>
+#include <apr_general.h>
+
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+
+#include "diff.h"
+
+
+/*
+ * Prime number to use as the size of the hash table. This number was
+ * not selected by testing of any kind and may need tweaking.
+ */
+#define SVN_DIFF__HASH_SIZE 127
+
+struct svn_diff__node_t
+{
+ svn_diff__node_t *parent;
+ svn_diff__node_t *left;
+ svn_diff__node_t *right;
+
+ apr_uint32_t hash;
+ svn_diff__token_index_t index;
+ void *token;
+};
+
+struct svn_diff__tree_t
+{
+ svn_diff__node_t *root[SVN_DIFF__HASH_SIZE];
+ apr_pool_t *pool;
+ svn_diff__token_index_t node_count;
+};
+
+
+/*
+ * Returns number of tokens in a tree
+ */
+svn_diff__token_index_t
+svn_diff__get_node_count(svn_diff__tree_t *tree)
+{
+ return tree->node_count;
+}
+
+/*
+ * Support functions to build a tree of token positions
+ */
+
+void
+svn_diff__tree_create(svn_diff__tree_t **tree, apr_pool_t *pool)
+{
+ *tree = apr_pcalloc(pool, sizeof(**tree));
+ (*tree)->pool = pool;
+ (*tree)->node_count = 0;
+}
+
+
+static svn_error_t *
+tree_insert_token(svn_diff__node_t **node, svn_diff__tree_t *tree,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ apr_uint32_t hash, void *token)
+{
+ svn_diff__node_t *new_node;
+ svn_diff__node_t **node_ref;
+ svn_diff__node_t *parent;
+ int rv;
+
+ SVN_ERR_ASSERT(token);
+
+ parent = NULL;
+ node_ref = &tree->root[hash % SVN_DIFF__HASH_SIZE];
+
+ while (*node_ref != NULL)
+ {
+ parent = *node_ref;
+
+ rv = hash - parent->hash;
+ if (!rv)
+ SVN_ERR(vtable->token_compare(diff_baton, parent->token, token, &rv));
+
+ if (rv == 0)
+ {
+ /* Discard the previous token. This helps in cases where
+ * only recently read tokens are still in memory.
+ */
+ if (vtable->token_discard != NULL)
+ vtable->token_discard(diff_baton, parent->token);
+
+ parent->token = token;
+ *node = parent;
+
+ return SVN_NO_ERROR;
+ }
+ else if (rv > 0)
+ {
+ node_ref = &parent->left;
+ }
+ else
+ {
+ node_ref = &parent->right;
+ }
+ }
+
+ /* Create a new node */
+ new_node = apr_palloc(tree->pool, sizeof(*new_node));
+ new_node->parent = parent;
+ new_node->left = NULL;
+ new_node->right = NULL;
+ new_node->hash = hash;
+ new_node->token = token;
+ new_node->index = tree->node_count++;
+
+ *node = *node_ref = new_node;
+
+ return SVN_NO_ERROR;
+}
+
+
+/*
+ * Get all tokens from a datasource. Return the
+ * last item in the (circular) list.
+ */
+svn_error_t *
+svn_diff__get_tokens(svn_diff__position_t **position_list,
+ svn_diff__tree_t *tree,
+ void *diff_baton,
+ const svn_diff_fns2_t *vtable,
+ svn_diff_datasource_e datasource,
+ apr_off_t prefix_lines,
+ apr_pool_t *pool)
+{
+ svn_diff__position_t *start_position;
+ svn_diff__position_t *position = NULL;
+ svn_diff__position_t **position_ref;
+ svn_diff__node_t *node;
+ void *token;
+ apr_off_t offset;
+ apr_uint32_t hash;
+
+ *position_list = NULL;
+
+ position_ref = &start_position;
+ offset = prefix_lines;
+ hash = 0; /* The callback fn doesn't need to touch it per se */
+ while (1)
+ {
+ SVN_ERR(vtable->datasource_get_next_token(&hash, &token,
+ diff_baton, datasource));
+ if (token == NULL)
+ break;
+
+ offset++;
+ SVN_ERR(tree_insert_token(&node, tree, diff_baton, vtable, hash, token));
+
+ /* Create a new position */
+ position = apr_palloc(pool, sizeof(*position));
+ position->next = NULL;
+ position->token_index = node->index;
+ position->offset = offset;
+
+ *position_ref = position;
+ position_ref = &position->next;
+ }
+
+ *position_ref = start_position;
+
+ SVN_ERR(vtable->datasource_close(diff_baton, datasource));
+
+ *position_list = position;
+
+ return SVN_NO_ERROR;
+}
diff --git a/subversion/libsvn_diff/util.c b/subversion/libsvn_diff/util.c
new file mode 100644
index 000000000000..9e1f41176e54
--- /dev/null
+++ b/subversion/libsvn_diff/util.c
@@ -0,0 +1,591 @@
+/*
+ * util.c : routines for doing diffs
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+
+#include <apr.h>
+#include <apr_general.h>
+
+#include "svn_hash.h"
+#include "svn_pools.h"
+#include "svn_dirent_uri.h"
+#include "svn_props.h"
+#include "svn_mergeinfo.h"
+#include "svn_error.h"
+#include "svn_diff.h"
+#include "svn_types.h"
+#include "svn_ctype.h"
+#include "svn_utf.h"
+#include "svn_version.h"
+
+#include "private/svn_diff_private.h"
+#include "diff.h"
+
+#include "svn_private_config.h"
+
+
+svn_boolean_t
+svn_diff_contains_conflicts(svn_diff_t *diff)
+{
+ while (diff != NULL)
+ {
+ if (diff->type == svn_diff__type_conflict)
+ {
+ return TRUE;
+ }
+
+ diff = diff->next;
+ }
+
+ return FALSE;
+}
+
+svn_boolean_t
+svn_diff_contains_diffs(svn_diff_t *diff)
+{
+ while (diff != NULL)
+ {
+ if (diff->type != svn_diff__type_common)
+ {
+ return TRUE;
+ }
+
+ diff = diff->next;
+ }
+
+ return FALSE;
+}
+
+svn_error_t *
+svn_diff_output(svn_diff_t *diff,
+ void *output_baton,
+ const svn_diff_output_fns_t *vtable)
+{
+ svn_error_t *(*output_fn)(void *,
+ apr_off_t, apr_off_t,
+ apr_off_t, apr_off_t,
+ apr_off_t, apr_off_t);
+
+ while (diff != NULL)
+ {
+ switch (diff->type)
+ {
+ case svn_diff__type_common:
+ output_fn = vtable->output_common;
+ break;
+
+ case svn_diff__type_diff_common:
+ output_fn = vtable->output_diff_common;
+ break;
+
+ case svn_diff__type_diff_modified:
+ output_fn = vtable->output_diff_modified;
+ break;
+
+ case svn_diff__type_diff_latest:
+ output_fn = vtable->output_diff_latest;
+ break;
+
+ case svn_diff__type_conflict:
+ output_fn = NULL;
+ if (vtable->output_conflict != NULL)
+ {
+ SVN_ERR(vtable->output_conflict(output_baton,
+ diff->original_start, diff->original_length,
+ diff->modified_start, diff->modified_length,
+ diff->latest_start, diff->latest_length,
+ diff->resolved_diff));
+ }
+ break;
+
+ default:
+ output_fn = NULL;
+ break;
+ }
+
+ if (output_fn != NULL)
+ {
+ SVN_ERR(output_fn(output_baton,
+ diff->original_start, diff->original_length,
+ diff->modified_start, diff->modified_length,
+ diff->latest_start, diff->latest_length));
+ }
+
+ diff = diff->next;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+void
+svn_diff__normalize_buffer(char **tgt,
+ apr_off_t *lengthp,
+ svn_diff__normalize_state_t *statep,
+ const char *buf,
+ const svn_diff_file_options_t *opts)
+{
+ /* Variables for looping through BUF */
+ const char *curp, *endp;
+
+ /* Variable to record normalizing state */
+ svn_diff__normalize_state_t state = *statep;
+
+ /* Variables to track what needs copying into the target buffer */
+ const char *start = buf;
+ apr_size_t include_len = 0;
+ svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
+
+ /* Variable to record the state of the target buffer */
+ char *tgt_newend = *tgt;
+
+ /* If this is a noop, then just get out of here. */
+ if (! opts->ignore_space && ! opts->ignore_eol_style)
+ {
+ *tgt = (char *)buf;
+ return;
+ }
+
+
+ /* It only took me forever to get this routine right,
+ so here my thoughts go:
+
+ Below, we loop through the data, doing 2 things:
+
+ - Normalizing
+ - Copying other data
+
+ The routine tries its hardest *not* to copy data, but instead
+ returning a pointer into already normalized existing data.
+
+ To this end, a block 'other data' shouldn't be copied when found,
+ but only as soon as it can't be returned in-place.
+
+ On a character level, there are 3 possible operations:
+
+ - Skip the character (don't include in the normalized data)
+ - Include the character (do include in the normalizad data)
+ - Include as another character
+ This is essentially the same as skipping the current character
+ and inserting a given character in the output data.
+
+ The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
+ handle the character based operations. The macros themselves
+ collect character level data into blocks.
+
+ At all times designate the START, INCLUDED_LEN and CURP pointers
+ an included and and skipped block like this:
+
+ [ start, start + included_len ) [ start + included_len, curp )
+ INCLUDED EXCLUDED
+
+ When the routine flips from skipping to including, the last
+ included block has to be flushed to the output buffer.
+ */
+
+ /* Going from including to skipping; only schedules the current
+ included section for flushing.
+ Also, simply chop off the character if it's the first in the buffer,
+ so we can possibly just return the remainder of the buffer */
+#define SKIP \
+ do { \
+ if (start == curp) \
+ ++start; \
+ last_skipped = TRUE; \
+ } while (0)
+
+#define INCLUDE \
+ do { \
+ if (last_skipped) \
+ COPY_INCLUDED_SECTION; \
+ ++include_len; \
+ last_skipped = FALSE; \
+ } while (0)
+
+#define COPY_INCLUDED_SECTION \
+ do { \
+ if (include_len > 0) \
+ { \
+ memmove(tgt_newend, start, include_len); \
+ tgt_newend += include_len; \
+ include_len = 0; \
+ } \
+ start = curp; \
+ } while (0)
+
+ /* Include the current character as character X.
+ If the current character already *is* X, add it to the
+ currently included region, increasing chances for consecutive
+ fully normalized blocks. */
+#define INCLUDE_AS(x) \
+ do { \
+ if (*curp == (x)) \
+ INCLUDE; \
+ else \
+ { \
+ INSERT((x)); \
+ SKIP; \
+ } \
+ } while (0)
+
+ /* Insert character X in the output buffer */
+#define INSERT(x) \
+ do { \
+ COPY_INCLUDED_SECTION; \
+ *tgt_newend++ = (x); \
+ } while (0)
+
+ for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
+ {
+ switch (*curp)
+ {
+ case '\r':
+ if (opts->ignore_eol_style)
+ INCLUDE_AS('\n');
+ else
+ INCLUDE;
+ state = svn_diff__normalize_state_cr;
+ break;
+
+ case '\n':
+ if (state == svn_diff__normalize_state_cr
+ && opts->ignore_eol_style)
+ SKIP;
+ else
+ INCLUDE;
+ state = svn_diff__normalize_state_normal;
+ break;
+
+ default:
+ if (svn_ctype_isspace(*curp)
+ && opts->ignore_space != svn_diff_file_ignore_space_none)
+ {
+ /* Whitespace but not '\r' or '\n' */
+ if (state != svn_diff__normalize_state_whitespace
+ && opts->ignore_space
+ == svn_diff_file_ignore_space_change)
+ /*### If we can postpone insertion of the space
+ until the next non-whitespace character,
+ we have a potential of reducing the number of copies:
+ If this space is followed by more spaces,
+ this will cause a block-copy.
+ If the next non-space block is considered normalized
+ *and* preceded by a space, we can take advantage of that. */
+ /* Note, the above optimization applies to 90% of the source
+ lines in our own code, since it (generally) doesn't use
+ more than one space per blank section, except for the
+ beginning of a line. */
+ INCLUDE_AS(' ');
+ else
+ SKIP;
+ state = svn_diff__normalize_state_whitespace;
+ }
+ else
+ {
+ /* Non-whitespace character, or whitespace character in
+ svn_diff_file_ignore_space_none mode. */
+ INCLUDE;
+ state = svn_diff__normalize_state_normal;
+ }
+ }
+ }
+
+ /* If we're not in whitespace, flush the last chunk of data.
+ * Note that this will work correctly when this is the last chunk of the
+ * file:
+ * * If there is an eol, it will either have been output when we entered
+ * the state_cr, or it will be output now.
+ * * If there is no eol and we're not in whitespace, then we just output
+ * everything below.
+ * * If there's no eol and we are in whitespace, we want to ignore
+ * whitespace unconditionally. */
+
+ if (*tgt == tgt_newend)
+ {
+ /* we haven't copied any data in to *tgt and our chunk consists
+ only of one block of (already normalized) data.
+ Just return the block. */
+ *tgt = (char *)start;
+ *lengthp = include_len;
+ }
+ else
+ {
+ COPY_INCLUDED_SECTION;
+ *lengthp = tgt_newend - *tgt;
+ }
+
+ *statep = state;
+
+#undef SKIP
+#undef INCLUDE
+#undef INCLUDE_AS
+#undef INSERT
+#undef COPY_INCLUDED_SECTION
+}
+
+svn_error_t *
+svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
+ const char *header_encoding,
+ apr_pool_t *scratch_pool)
+{
+ const char *out_str;
+
+ SVN_ERR(svn_utf_cstring_from_utf8_ex2(
+ &out_str,
+ APR_EOL_STR
+ SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
+ header_encoding, scratch_pool));
+ svn_stringbuf_appendcstr(stringbuf, out_str);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
+ const char *header_encoding,
+ const char *hunk_delimiter,
+ apr_off_t old_start,
+ apr_off_t old_length,
+ apr_off_t new_start,
+ apr_off_t new_length,
+ const char *hunk_extra_context,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ "%s -%" APR_OFF_T_FMT,
+ hunk_delimiter, old_start));
+ /* If the hunk length is 1, suppress the number of lines in the hunk
+ * (it is 1 implicitly) */
+ if (old_length != 1)
+ {
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ ",%" APR_OFF_T_FMT, old_length));
+ }
+
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ " +%" APR_OFF_T_FMT, new_start));
+ if (new_length != 1)
+ {
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ ",%" APR_OFF_T_FMT, new_length));
+ }
+
+ if (hunk_extra_context == NULL)
+ hunk_extra_context = "";
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ " %s%s%s" APR_EOL_STR,
+ hunk_delimiter,
+ hunk_extra_context[0] ? " " : "",
+ hunk_extra_context));
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff__unidiff_write_header(svn_stream_t *output_stream,
+ const char *header_encoding,
+ const char *old_header,
+ const char *new_header,
+ apr_pool_t *scratch_pool)
+{
+ SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
+ scratch_pool,
+ "--- %s" APR_EOL_STR
+ "+++ %s" APR_EOL_STR,
+ old_header,
+ new_header));
+ return SVN_NO_ERROR;
+}
+
+/* A helper function for display_prop_diffs. Output the differences between
+ the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
+ human-readable form to OUTSTREAM, using ENCODING. Use POOL for temporary
+ allocations. */
+static svn_error_t *
+display_mergeinfo_diff(const char *old_mergeinfo_val,
+ const char *new_mergeinfo_val,
+ const char *encoding,
+ svn_stream_t *outstream,
+ apr_pool_t *pool)
+{
+ apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ apr_hash_index_t *hi;
+
+ if (old_mergeinfo_val)
+ SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
+ else
+ old_mergeinfo_hash = NULL;
+
+ if (new_mergeinfo_val)
+ SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
+ else
+ new_mergeinfo_hash = NULL;
+
+ SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
+ new_mergeinfo_hash,
+ TRUE, pool, pool));
+
+ for (hi = apr_hash_first(pool, deleted);
+ hi; hi = apr_hash_next(hi))
+ {
+ const char *from_path = svn__apr_hash_index_key(hi);
+ svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
+ svn_string_t *merge_revstr;
+
+ svn_pool_clear(iterpool);
+ SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
+ iterpool));
+
+ SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
+ _(" Reverse-merged %s:r%s%s"),
+ from_path, merge_revstr->data,
+ APR_EOL_STR));
+ }
+
+ for (hi = apr_hash_first(pool, added);
+ hi; hi = apr_hash_next(hi))
+ {
+ const char *from_path = svn__apr_hash_index_key(hi);
+ svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
+ svn_string_t *merge_revstr;
+
+ svn_pool_clear(iterpool);
+ SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
+ iterpool));
+
+ SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
+ _(" Merged %s:r%s%s"),
+ from_path, merge_revstr->data,
+ APR_EOL_STR));
+ }
+
+ svn_pool_destroy(iterpool);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff__display_prop_diffs(svn_stream_t *outstream,
+ const char *encoding,
+ const apr_array_header_t *propchanges,
+ apr_hash_t *original_props,
+ svn_boolean_t pretty_print_mergeinfo,
+ apr_pool_t *pool)
+{
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ int i;
+
+ for (i = 0; i < propchanges->nelts; i++)
+ {
+ const char *action;
+ const svn_string_t *original_value;
+ const svn_prop_t *propchange
+ = &APR_ARRAY_IDX(propchanges, i, svn_prop_t);
+
+ if (original_props)
+ original_value = svn_hash_gets(original_props, propchange->name);
+ else
+ original_value = NULL;
+
+ /* If the property doesn't exist on either side, or if it exists
+ with the same value, skip it. This can happen if the client is
+ hitting an old mod_dav_svn server that doesn't understand the
+ "send-all" REPORT style. */
+ if ((! (original_value || propchange->value))
+ || (original_value && propchange->value
+ && svn_string_compare(original_value, propchange->value)))
+ continue;
+
+ svn_pool_clear(iterpool);
+
+ if (! original_value)
+ action = "Added";
+ else if (! propchange->value)
+ action = "Deleted";
+ else
+ action = "Modified";
+ SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
+ "%s: %s%s", action,
+ propchange->name, APR_EOL_STR));
+
+ if (pretty_print_mergeinfo
+ && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
+ {
+ const char *orig = original_value ? original_value->data : NULL;
+ const char *val = propchange->value ? propchange->value->data : NULL;
+ svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
+ outstream, iterpool);
+
+ /* Issue #3896: If we can't pretty-print mergeinfo differences
+ because invalid mergeinfo is present, then don't let the diff
+ fail, just print the diff as any other property. */
+ if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
+ {
+ svn_error_clear(err);
+ }
+ else
+ {
+ SVN_ERR(err);
+ continue;
+ }
+ }
+
+ {
+ svn_diff_t *diff;
+ svn_diff_file_options_t options = { 0 };
+ const svn_string_t *orig
+ = original_value ? original_value
+ : svn_string_create_empty(iterpool);
+ const svn_string_t *val
+ = propchange->value ? propchange->value
+ : svn_string_create_empty(iterpool);
+
+ SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
+ iterpool));
+
+ /* UNIX patch will try to apply a diff even if the diff header
+ * is missing. It tries to be helpful by asking the user for a
+ * target filename when it can't determine the target filename
+ * from the diff header. But there usually are no files which
+ * UNIX patch could apply the property diff to, so we use "##"
+ * instead of "@@" as the default hunk delimiter for property diffs.
+ * We also supress the diff header. */
+ SVN_ERR(svn_diff_mem_string_output_unified2(
+ outstream, diff, FALSE /* no header */, "##", NULL, NULL,
+ encoding, orig, val, iterpool));
+ }
+ }
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Return the library version number. */
+const svn_version_t *
+svn_diff_version(void)
+{
+ SVN_VERSION_BODY;
+}