001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.IOException; 020import java.nio.charset.Charset; 021import java.nio.charset.StandardCharsets; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Locale; 026import java.util.Map; 027import java.util.Objects; 028 029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 030import org.apache.commons.io.IOUtils; 031 032/** 033 * High level API for processing file uploads. 034 * <p> 035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by 036 * <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with 037 * a given HTML widget. 038 * </p> 039 * <p> 040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else. 041 * </p> 042 * 043 * @param <R> The request context type. 044 * @param <I> The FileItem type. 045 * @param <F> the FileItemFactory type. 046 */ 047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> { 048 049 /** 050 * Boundary parameter key. 051 */ 052 private static final String BOUNDARY_KEY = "boundary"; 053 054 /** 055 * Name parameter key. 056 */ 057 private static final String NAME_KEY = "name"; 058 059 /** 060 * File name parameter key. 061 */ 062 private static final String FILENAME_KEY = "filename"; 063 064 /** 065 * Constant for HTTP POST method. 066 */ 067 private static final String POST_METHOD = "POST"; 068 069 /** 070 * Constant for HTTP PUT method. 071 */ 072 private static final String PUT_METHOD = "PUT"; 073 074 /** 075 * Constant for HTTP PATCH method. 076 */ 077 private static final String PATCH_METHOD = "PATCH"; 078 079 /** 080 * HTTP content type header name. 081 */ 082 public static final String CONTENT_TYPE = "Content-type"; 083 084 /** 085 * HTTP content disposition header name. 086 */ 087 public static final String CONTENT_DISPOSITION = "Content-disposition"; 088 089 /** 090 * HTTP content length header name. 091 */ 092 public static final String CONTENT_LENGTH = "Content-length"; 093 094 /** 095 * Content-disposition value for form data. 096 */ 097 public static final String FORM_DATA = "form-data"; 098 099 /** 100 * Content-disposition value for file attachment. 101 */ 102 public static final String ATTACHMENT = "attachment"; 103 104 /** 105 * Part of HTTP content type header. 106 */ 107 public static final String MULTIPART = "multipart/"; 108 109 /** 110 * HTTP content type header for multipart forms. 111 */ 112 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 113 114 /** 115 * HTTP content type header for multiple uploads. 116 */ 117 public static final String MULTIPART_MIXED = "multipart/mixed"; 118 119 /** 120 * Utility method that determines whether the request contains multipart content. 121 * <p> 122 * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this 123 * method is static, it is not possible to provide its replacement until this method is removed. 124 * </p> 125 * 126 * @param ctx The request context to be evaluated. Must be non-null. 127 * @return {@code true} if the request is multipart; {@code false} otherwise. 128 */ 129 public static final boolean isMultipartContent(final RequestContext ctx) { 130 final var contentType = ctx.getContentType(); 131 if (contentType == null) { 132 return false; 133 } 134 return contentType.toLowerCase(Locale.ROOT).startsWith(MULTIPART); 135 } 136 137 /** 138 * Checks if a given request method is a valid multipart request method. 139 * 140 * @param method The request method verb. 141 * @return {@code true} if the request method supports multipart request payloads; {@code false} otherwise. 142 * @since 2.0.0-M5 143 */ 144 protected static boolean isMultipartRequestMethod(final String method) { 145 return POST_METHOD.equalsIgnoreCase(method) || PUT_METHOD.equalsIgnoreCase(method) || PATCH_METHOD.equalsIgnoreCase(method); 146 } 147 148 /** 149 * The maximum size permitted for the complete request, as opposed to {@link #maxFileSize}. A value of -1 indicates no maximum. 150 */ 151 private long maxSize = -1; 152 153 /** 154 * The maximum size permitted for a single uploaded file, as opposed to {@link #maxSize}. A value of -1 indicates no maximum. 155 */ 156 private long maxFileSize = -1; 157 158 /** 159 * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum. 160 */ 161 private long maxFileCount = -1; 162 163 /** 164 * The maximum permitted size of the headers provided with a single part in bytes. 165 */ 166 private int maxPartHeaderSize = MultipartInput.DEFAULT_PART_HEADER_SIZE_MAX; 167 168 /** 169 * The content encoding to use when reading part headers. 170 */ 171 private Charset headerCharset; 172 173 /** 174 * The progress listener. 175 */ 176 private ProgressListener progressListener = ProgressListener.NOP; 177 178 /** 179 * The factory to use to create new form items. 180 */ 181 private F fileItemFactory; 182 183 /** 184 * Constructs a new instance for subclasses. 185 */ 186 public AbstractFileUpload() { 187 // empty 188 } 189 190 /** 191 * Gets the boundary from the {@code Content-type} header. 192 * 193 * @param contentType The value of the content type header from which to extract the boundary value. 194 * @return The boundary, as a byte array. 195 */ 196 public byte[] getBoundary(final String contentType) { 197 final var parser = new ParameterParser(); 198 parser.setLowerCaseNames(true); 199 // Parameter parser can handle null input 200 final var params = parser.parse(contentType, new char[] { ';', ',' }); 201 final var boundaryStr = params.get(BOUNDARY_KEY); 202 return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null; 203 } 204 205 /** 206 * Gets the field name from the {@code Content-disposition} header. 207 * 208 * @param headers A {@code Map} containing the HTTP request headers. 209 * @return The field name for the current {@code encapsulation}. 210 */ 211 public String getFieldName(final FileItemHeaders headers) { 212 return getFieldName(headers.getHeader(CONTENT_DISPOSITION)); 213 } 214 215 /** 216 * Gets the field name, which is given by the content-disposition header. 217 * 218 * @param contentDisposition The content-dispositions header value. 219 * @return The field name. 220 */ 221 private String getFieldName(final String contentDisposition) { 222 String fieldName = null; 223 if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ROOT).startsWith(FORM_DATA)) { 224 final var parser = new ParameterParser(); 225 parser.setLowerCaseNames(true); 226 // Parameter parser can handle null input 227 final var params = parser.parse(contentDisposition, ';'); 228 fieldName = params.get(NAME_KEY); 229 if (fieldName != null) { 230 fieldName = fieldName.trim(); 231 } 232 } 233 return fieldName; 234 } 235 236 /** 237 * Gets the factory class used when creating file items. 238 * 239 * @return The factory class for new file items. 240 */ 241 public F getFileItemFactory() { 242 return fileItemFactory; 243 } 244 245 /** 246 * Gets the file name from the {@code Content-disposition} header. 247 * 248 * @param headers The HTTP headers object. 249 * @return The file name for the current {@code encapsulation}. 250 */ 251 public String getFileName(final FileItemHeaders headers) { 252 return getFileName(headers.getHeader(CONTENT_DISPOSITION)); 253 } 254 255 /** 256 * Gets the given content-disposition headers file name. 257 * 258 * @param contentDisposition The content-disposition headers value. 259 * @return The file name 260 */ 261 private String getFileName(final String contentDisposition) { 262 String fileName = null; 263 if (contentDisposition != null) { 264 final var cdl = contentDisposition.toLowerCase(Locale.ROOT); 265 if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) { 266 final var parser = new ParameterParser(); 267 parser.setLowerCaseNames(true); 268 // Parameter parser can handle null input 269 final var params = parser.parse(contentDisposition, ';'); 270 if (params.containsKey(FILENAME_KEY)) { 271 fileName = params.get(FILENAME_KEY); 272 if (fileName != null) { 273 fileName = fileName.trim(); 274 } else { 275 // Even if there is no value, the parameter is present, 276 // so we return an empty file name rather than no file 277 // name. 278 fileName = ""; 279 } 280 } 281 } 282 } 283 return fileName; 284 } 285 286 /** 287 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If 288 * that is also not specified, or {@code null}, the platform default encoding is used. 289 * 290 * @return The encoding used to read part headers. 291 */ 292 public Charset getHeaderCharset() { 293 return headerCharset; 294 } 295 296 /** 297 * Gets a file item iterator. 298 * 299 * @param request The servlet request to be parsed. 300 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 301 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 302 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 303 * uploaded content. 304 */ 305 public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException; 306 307 /** 308 * Gets an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 309 * 310 * @param requestContext The context for the request to be parsed. 311 * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted. 312 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 313 * @throws IOException An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the 314 * uploaded content. 315 */ 316 public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException { 317 return new FileItemInputIteratorImpl(this, requestContext); 318 } 319 320 /** 321 * Gets the maximum number of files allowed in a single request. 322 * 323 * @return The maximum number of files allowed in a single request. 324 */ 325 public long getMaxFileCount() { 326 return maxFileCount; 327 } 328 329 /** 330 * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getMaxSize()}. 331 * 332 * @see #setMaxFileSize(long) 333 * @return Maximum size of a single uploaded file. 334 */ 335 public long getMaxFileSize() { 336 return maxFileSize; 337 } 338 339 /** 340 * Gets the per part size limit for headers. 341 * 342 * @return The maximum size of the headers for a single part in bytes. 343 * 344 * @since 2.0.0-M5 345 */ 346 public int getMaxPartHeaderSize() { 347 return maxPartHeaderSize; 348 } 349 350 /** 351 * Gets the maximum allowed size of a complete request, as opposed to {@link #getMaxFileSize()}. 352 * 353 * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 354 * @see #setMaxSize(long) 355 */ 356 public long getMaxSize() { 357 return maxSize; 358 } 359 360 /** 361 * Parses the {@code header-part} and returns as key/value pairs. 362 * <p> 363 * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values. 364 * </p> 365 * 366 * @param headerPart The {@code header-part} of the current {@code encapsulation}. 367 * @return A {@code Map} containing the parsed HTTP request headers. 368 */ 369 public FileItemHeaders getParsedHeaders(final String headerPart) { 370 final var len = headerPart.length(); 371 final var headers = newFileItemHeaders(); 372 var start = 0; 373 for (;;) { 374 var end = parseEndOfLine(headerPart, start); 375 if (start == end) { 376 break; 377 } 378 final var header = new StringBuilder(headerPart.substring(start, end)); 379 start = end + 2; 380 while (start < len) { 381 var nonWs = start; 382 while (nonWs < len) { 383 final var c = headerPart.charAt(nonWs); 384 if (c != ' ' && c != '\t') { 385 break; 386 } 387 ++nonWs; 388 } 389 if (nonWs == start) { 390 break; 391 } 392 // Continuation line found 393 end = parseEndOfLine(headerPart, nonWs); 394 header.append(' ').append(headerPart, nonWs, end); 395 start = end + 2; 396 } 397 parseHeaderLine(headers, header.toString()); 398 } 399 return headers; 400 } 401 402 /** 403 * Gets the progress listener. 404 * 405 * @return The progress listener, if any, or null. 406 */ 407 public ProgressListener getProgressListener() { 408 return progressListener; 409 } 410 411 /** 412 * Creates a new instance of {@link FileItemHeaders}. 413 * 414 * @return The new instance. 415 */ 416 protected FileItemHeaders newFileItemHeaders() { 417 return AbstractFileItemBuilder.newFileItemHeaders(); 418 } 419 420 /** 421 * Skips bytes until the end of the current line. 422 * 423 * @param headerPart The headers, which are being parsed. 424 * @param end Index of the last byte, which has yet been processed. 425 * @return Index of the \r\n sequence, which indicates end of line. 426 */ 427 private int parseEndOfLine(final String headerPart, final int end) { 428 var index = end; 429 for (;;) { 430 final var offset = headerPart.indexOf('\r', index); 431 if (offset == -1 || offset + 1 >= headerPart.length()) { 432 throw new IllegalStateException("Expected headers to be terminated by an empty line."); 433 } 434 if (headerPart.charAt(offset + 1) == '\n') { 435 return offset; 436 } 437 index = offset + 1; 438 } 439 } 440 441 /** 442 * Parses the next header line. 443 * 444 * @param headers String with all headers. 445 * @param header Map where to store the current header. 446 */ 447 private void parseHeaderLine(final FileItemHeaders headers, final String header) { 448 final var colonOffset = header.indexOf(':'); 449 if (colonOffset == -1) { 450 // This header line is malformed, skip it. 451 return; 452 } 453 final var headerName = header.substring(0, colonOffset).trim(); 454 final var headerValue = header.substring(colonOffset + 1).trim(); 455 headers.addHeader(headerName, headerValue); 456 } 457 458 /** 459 * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 460 * 461 * @param request The servlet request to be parsed. 462 * @return A map of {@code FileItem} instances parsed from the request. 463 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 464 */ 465 public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException; 466 467 /** 468 * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 469 * 470 * @param ctx The context for the request to be parsed. 471 * @return A map of {@code FileItem} instances parsed from the request. 472 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 473 */ 474 public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException { 475 final var items = parseRequest(ctx); 476 final Map<String, List<I>> itemsMap = new HashMap<>(items.size()); 477 478 for (final I fileItem : items) { 479 final var fieldName = fileItem.getFieldName(); 480 final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>()); 481 mappedItems.add(fileItem); 482 } 483 484 return itemsMap; 485 } 486 487 /** 488 * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 489 * 490 * @param request The servlet request to be parsed. 491 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 492 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 493 */ 494 public abstract List<I> parseRequest(R request) throws FileUploadException; 495 496 /** 497 * Parses an <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream. 498 * 499 * @param requestContext The context for the request to be parsed. 500 * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted. 501 * @throws FileUploadException if there are problems reading/parsing the request or storing files. 502 */ 503 public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException { 504 final List<I> itemList = new ArrayList<>(); 505 var successful = false; 506 try { 507 final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set."); 508 final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE]; 509 getItemIterator(requestContext).forEachRemaining(fileItemInput -> { 510 final int size = itemList.size(); 511 if (size == maxFileCount) { 512 // The next item will exceed the limit. 513 throw new FileUploadFileCountLimitException( 514 String.format("Request '%s' failed: Maximum file count %,d exceeded.", MULTIPART_FORM_DATA, Long.valueOf(maxFileCount)), 515 getMaxFileCount(), size); 516 } 517 // Don't use getName() here to prevent an InvalidFileNameException. 518 // @formatter:off 519 final var fileItem = fileItemFactory.fileItemBuilder() 520 .setFieldName(fileItemInput.getFieldName()) 521 .setContentType(fileItemInput.getContentType()) 522 .setFormField(fileItemInput.isFormField()) 523 .setFileName(fileItemInput.getName()) 524 .setFileItemHeaders(fileItemInput.getHeaders()) 525 .get(); 526 // @formatter:on 527 itemList.add(fileItem); 528 try (var inputStream = fileItemInput.getInputStream(); var outputStream = fileItem.getOutputStream()) { 529 IOUtils.copyLarge(inputStream, outputStream, buffer); 530 } catch (final FileUploadException e) { 531 throw e; 532 } catch (final IOException e) { 533 throw new FileUploadException(String.format("Request '%s' failed: %s", MULTIPART_FORM_DATA, e.getMessage()), e); 534 } 535 }); 536 successful = true; 537 return itemList; 538 } catch (final FileUploadException e) { 539 throw e; 540 } catch (final IOException e) { 541 throw new FileUploadException(e.getMessage(), e); 542 } finally { 543 if (!successful) { 544 for (final I fileItem : itemList) { 545 try { 546 fileItem.delete(); 547 } catch (final Exception ignored) { 548 // ignored TODO perhaps add to tracker delete failure list somehow? 549 } 550 } 551 } 552 } 553 } 554 555 /** 556 * Sets the factory class to use when creating file items. 557 * 558 * @param factory The factory class for new file items. 559 */ 560 public void setFileItemFactory(final F factory) { 561 this.fileItemFactory = factory; 562 } 563 564 /** 565 * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is 566 * used. If that is also not specified, or {@code null}, the platform default encoding is used. 567 * 568 * @param headerCharset The encoding used to read part headers. 569 */ 570 public void setHeaderCharset(final Charset headerCharset) { 571 this.headerCharset = headerCharset; 572 } 573 574 /** 575 * Sets the maximum number of files allowed per request. 576 * 577 * @param fileCountMax The new limit. {@code -1} means no limit. 578 */ 579 public void setMaxFileCount(final long fileCountMax) { 580 this.maxFileCount = fileCountMax; 581 } 582 583 /** 584 * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getMaxSize()}. 585 * 586 * @see #getMaxFileSize() 587 * @param fileSizeMax Maximum size of a single uploaded file. 588 */ 589 public void setMaxFileSize(final long fileSizeMax) { 590 this.maxFileSize = fileSizeMax; 591 } 592 593 /** 594 * Sets the per part size limit for headers. 595 * 596 * @param partHeaderSizeMax The maximum size of the headers in bytes. 597 * 598 * @since 2.0.0-M5 599 */ 600 public void setMaxPartHeaderSize(final int partHeaderSizeMax) { 601 this.maxPartHeaderSize = partHeaderSizeMax; 602 } 603 604 /** 605 * Sets the maximum allowed size of a complete request, as opposed to {@link #setMaxFileSize(long)}. 606 * 607 * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit. 608 * @see #getMaxSize() 609 */ 610 public void setMaxSize(final long sizeMax) { 611 this.maxSize = sizeMax; 612 } 613 614 /** 615 * Sets the progress listener. 616 * 617 * @param progressListener The progress listener, if any. Defaults to null. 618 */ 619 public void setProgressListener(final ProgressListener progressListener) { 620 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP; 621 } 622 623}